gitextract_h_rw1s1r/

├── .clang-format
├── .flake8
├── .gitconfig
├── .github/
│   ├── CODE_OF_CONDUCT.md
│   ├── CONTRIBUTING.md
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug-report-s2t.md
│   │   ├── bug-report-tts.md
│   │   ├── feature-request.md
│   │   ├── others.md
│   │   └── question.md
│   ├── PULL_REQUEST_TEMPLATE.md
│   └── stale.yml
├── .gitignore
├── .mergify.yml
├── .pre-commit-config.yaml
├── .pre-commit-hooks/
│   ├── clang-format.hook
│   └── copyright-check.hook
├── .readthedocs.yml
├── .style.yapf
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── README_cn.md
├── audio/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── cmake/
│   │   ├── FindGFortranLibs.cmake
│   │   ├── external/
│   │   │   └── openblas.cmake
│   │   ├── pybind.cmake
│   │   └── summary.cmake
│   ├── paddleaudio/
│   │   ├── CMakeLists.txt
│   │   ├── __init__.py
│   │   ├── _extension.py
│   │   ├── _internal/
│   │   │   ├── __init__.py
│   │   │   └── module_utils.py
│   │   ├── backends/
│   │   │   ├── __init__.py
│   │   │   ├── common.py
│   │   │   ├── no_backend.py
│   │   │   ├── soundfile_backend.py
│   │   │   ├── sox_io_backend.py
│   │   │   └── utils.py
│   │   ├── compliance/
│   │   │   ├── __init__.py
│   │   │   ├── kaldi.py
│   │   │   └── librosa.py
│   │   ├── datasets/
│   │   │   ├── __init__.py
│   │   │   ├── dataset.py
│   │   │   ├── esc50.py
│   │   │   ├── gtzan.py
│   │   │   ├── hey_snips.py
│   │   │   ├── rirs_noises.py
│   │   │   ├── tess.py
│   │   │   ├── urban_sound.py
│   │   │   └── voxceleb.py
│   │   ├── features/
│   │   │   ├── __init__.py
│   │   │   └── layers.py
│   │   ├── functional/
│   │   │   ├── __init__.py
│   │   │   ├── functional.py
│   │   │   └── window.py
│   │   ├── kaldi/
│   │   │   ├── __init__.py
│   │   │   └── kaldi.py
│   │   ├── metric/
│   │   │   ├── __init__.py
│   │   │   └── eer.py
│   │   ├── sox_effects/
│   │   │   ├── __init__.py
│   │   │   └── sox_effects.py
│   │   ├── src/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── optional/
│   │   │   │   ├── COPYING
│   │   │   │   └── optional.hpp
│   │   │   ├── pybind/
│   │   │   │   ├── kaldi/
│   │   │   │   │   ├── feature_common.h
│   │   │   │   │   ├── feature_common_inl.h
│   │   │   │   │   ├── kaldi_feature.cc
│   │   │   │   │   ├── kaldi_feature.h
│   │   │   │   │   ├── kaldi_feature_wrapper.cc
│   │   │   │   │   └── kaldi_feature_wrapper.h
│   │   │   │   ├── pybind.cpp
│   │   │   │   └── sox/
│   │   │   │       ├── effects.cpp
│   │   │   │       ├── effects.h
│   │   │   │       ├── effects_chain.cpp
│   │   │   │       ├── effects_chain.h
│   │   │   │       ├── io.cpp
│   │   │   │       ├── io.h
│   │   │   │       ├── types.cpp
│   │   │   │       ├── types.h
│   │   │   │       ├── utils.cpp
│   │   │   │       └── utils.h
│   │   │   └── utils.cpp
│   │   ├── third_party/
│   │   │   ├── .gitignore
│   │   │   ├── CMakeLists.txt
│   │   │   ├── kaldi-native-fbank/
│   │   │   │   └── csrc/
│   │   │   │       ├── CMakeLists.txt
│   │   │   │       ├── feature-fbank.cc
│   │   │   │       ├── feature-fbank.h
│   │   │   │       ├── feature-functions.cc
│   │   │   │       ├── feature-functions.h
│   │   │   │       ├── feature-window.cc
│   │   │   │       ├── feature-window.h
│   │   │   │       ├── fftsg.c
│   │   │   │       ├── log.cc
│   │   │   │       ├── log.h
│   │   │   │       ├── mel-computations.cc
│   │   │   │       ├── mel-computations.h
│   │   │   │       ├── rfft.cc
│   │   │   │       └── rfft.h
│   │   │   ├── patches/
│   │   │   │   ├── config.guess
│   │   │   │   ├── config.sub
│   │   │   │   ├── libmad.patch
│   │   │   │   └── sox.patch
│   │   │   └── sox/
│   │   │       └── CMakeLists.txt
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── download.py
│   │       ├── env.py
│   │       ├── error.py
│   │       ├── log.py
│   │       ├── numeric.py
│   │       ├── sox_utils.py
│   │       ├── tensor_utils.py
│   │       └── time.py
│   ├── setup.py
│   ├── tests/
│   │   ├── backends/
│   │   │   ├── base.py
│   │   │   ├── common.py
│   │   │   ├── soundfile/
│   │   │   │   ├── base.py
│   │   │   │   ├── common.py
│   │   │   │   ├── info_test.py
│   │   │   │   ├── load_test.py
│   │   │   │   ├── save_test.py
│   │   │   │   └── test_io.py
│   │   │   └── sox_io/
│   │   │       ├── common.py
│   │   │       ├── info_test.py
│   │   │       ├── load_test.py
│   │   │       ├── save_test.py
│   │   │       ├── smoke_test.py
│   │   │       ├── sox_effect_test.py
│   │   │       └── sox_effect_test_args.jsonl
│   │   ├── benchmark/
│   │   │   ├── README.md
│   │   │   ├── log_melspectrogram.py
│   │   │   ├── melspectrogram.py
│   │   │   └── mfcc.py
│   │   ├── common_utils/
│   │   │   ├── __init__.py
│   │   │   ├── case_utils.py
│   │   │   ├── data_utils.py
│   │   │   ├── parameterized_utils.py
│   │   │   ├── sox_utils.py
│   │   │   └── wav_utils.py
│   │   └── features/
│   │       ├── __init__.py
│   │       ├── base.py
│   │       ├── test_istft.py
│   │       ├── test_kaldi.py
│   │       ├── test_kaldi_feat.py
│   │       ├── test_librosa.py
│   │       ├── test_log_melspectrogram.py
│   │       ├── test_spectrogram.py
│   │       ├── test_stft.py
│   │       └── testdata/
│   │           ├── fbank_feat.ark
│   │           ├── fbank_feat_txt.ark
│   │           ├── pitch_feat.ark
│   │           └── pitch_feat_txt.ark
│   └── tools/
│       └── setup_helpers/
│           ├── __init__.py
│           └── extension.py
├── dataset/
│   ├── aishell/
│   │   ├── .gitignore
│   │   └── aishell.py
│   ├── aishell3/
│   │   └── README.md
│   ├── chime3_background/
│   │   └── chime3_background.py
│   ├── gigaspeech/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── gigaspeech.py
│   │   └── run.sh
│   ├── librispeech/
│   │   ├── .gitignore
│   │   └── librispeech.py
│   ├── magicdata/
│   │   └── README.md
│   ├── mini_librispeech/
│   │   ├── .gitignore
│   │   └── mini_librispeech.py
│   ├── multi_cn/
│   │   └── README.md
│   ├── primewords/
│   │   └── README.md
│   ├── rir_noise/
│   │   ├── .gitignore
│   │   └── rir_noise.py
│   ├── st-cmds/
│   │   └── README.md
│   ├── tal_cs/
│   │   ├── README.md
│   │   └── tal_cs.py
│   ├── ted_en_zh/
│   │   ├── .gitignore
│   │   └── ted_en_zh.py
│   ├── thchs30/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   └── thchs30.py
│   ├── timit/
│   │   ├── .gitignore
│   │   ├── timit.py
│   │   └── timit_kaldi_standard_split.py
│   ├── voxceleb/
│   │   ├── README.md
│   │   ├── voxceleb1.py
│   │   └── voxceleb2.py
│   └── voxforge/
│       ├── run_data.sh
│       └── voxforge.py
├── demos/
│   ├── README.md
│   ├── README_cn.md
│   ├── TTSAndroid/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── baidu/
│   │   │       │               └── paddle/
│   │   │       │                   └── lite/
│   │   │       │                       └── demo/
│   │   │       │                           └── tts/
│   │   │       │                               └── ExampleInstrumentedTest.java
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── baidu/
│   │   │       │   │           └── paddle/
│   │   │       │   │               └── lite/
│   │   │       │   │                   └── demo/
│   │   │       │   │                       └── tts/
│   │   │       │   │                           ├── AppCompatPreferenceActivity.java
│   │   │       │   │                           ├── MainActivity.java
│   │   │       │   │                           ├── Predictor.java
│   │   │       │   │                           ├── SettingsActivity.java
│   │   │       │   │                           └── Utils.java
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   └── button_drawable.xml
│   │   │       │       ├── layout/
│   │   │       │       │   └── activity_main.xml
│   │   │       │       ├── menu/
│   │   │       │       │   └── menu_action_options.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── arrays.xml
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── styles.xml
│   │   │       │       └── xml/
│   │   │       │           └── settings.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── baidu/
│   │   │                       └── paddle/
│   │   │                           └── lite/
│   │   │                               └── demo/
│   │   │                                   └── tts/
│   │   │                                       └── ExampleUnitTest.java
│   │   ├── build.gradle
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       ├── gradle-wrapper.jar
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle
│   ├── TTSArmLinux/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── build.sh
│   │   ├── clean.sh
│   │   ├── config.sh
│   │   ├── download.sh
│   │   ├── front.conf
│   │   ├── run.sh
│   │   └── src/
│   │       ├── CMakeLists.txt
│   │       ├── Predictor.hpp
│   │       └── main.cc
│   ├── TTSCppFrontend/
│   │   ├── .gitignore
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── build-depends.sh
│   │   ├── build.sh
│   │   ├── clean.sh
│   │   ├── download.sh
│   │   ├── front_demo/
│   │   │   ├── front.conf
│   │   │   ├── front_demo.cpp
│   │   │   └── gentools/
│   │   │       ├── gen_dict_paddlespeech.py
│   │   │       ├── genid.py
│   │   │       └── word2phones.py
│   │   ├── run_front_demo.sh
│   │   ├── src/
│   │   │   ├── base/
│   │   │   │   ├── type_conv.cpp
│   │   │   │   └── type_conv.h
│   │   │   └── front/
│   │   │       ├── front_interface.cpp
│   │   │       ├── front_interface.h
│   │   │       ├── text_normalize.cpp
│   │   │       └── text_normalize.h
│   │   └── third-party/
│   │       └── CMakeLists.txt
│   ├── asr_deployment/
│   │   ├── README.md
│   │   └── README_cn.md
│   ├── audio_content_search/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── acs_clinet.py
│   │   ├── conf/
│   │   │   ├── acs_application.yaml
│   │   │   ├── words.txt
│   │   │   ├── ws_conformer_application.yaml
│   │   │   └── ws_conformer_wenetspeech_application.yaml
│   │   ├── requirements.txt
│   │   ├── run.sh
│   │   └── streaming_asr_server.py
│   ├── audio_searching/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── docker-compose.yaml
│   │   ├── requirements.txt
│   │   └── src/
│   │       ├── audio_search.py
│   │       ├── config.py
│   │       ├── encode.py
│   │       ├── logs.py
│   │       ├── milvus_helpers.py
│   │       ├── mysql_helpers.py
│   │       ├── operations/
│   │       │   ├── __init__.py
│   │       │   ├── count.py
│   │       │   ├── drop.py
│   │       │   ├── load.py
│   │       │   └── search.py
│   │       ├── test_audio_search.py
│   │       ├── test_vpr_search.py
│   │       └── vpr_search.py
│   ├── audio_tagging/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── automatic_video_subtitiles/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── recognize.py
│   │   └── run.sh
│   ├── custom_streaming_asr/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── path.sh
│   │   ├── setup_docker.sh
│   │   ├── websocket_client.sh
│   │   └── websocket_server.sh
│   ├── keyword_spotting/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── metaverse/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── path.sh
│   │   ├── run.sh
│   │   └── sentences.txt
│   ├── punctuation_restoration/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── speaker_verification/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── speech_recognition/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── speech_server/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── asr_client.sh
│   │   ├── cls_client.sh
│   │   ├── conf/
│   │   │   ├── application.yaml
│   │   │   └── conformer_talcs_application.yaml
│   │   ├── server.sh
│   │   ├── sid_client.sh
│   │   ├── start_multi_progress_server.py
│   │   ├── text_client.sh
│   │   └── tts_client.sh
│   ├── speech_ssl/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── speech_translation/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── speech_web/
│   │   ├── .gitignore
│   │   ├── API.md
│   │   ├── README.md
│   │   ├── speech_server/
│   │   │   ├── conf/
│   │   │   │   ├── tts3_finetune.yaml
│   │   │   │   ├── tts_online_application.yaml
│   │   │   │   └── ws_conformer_wenetspeech_application_faster.yaml
│   │   │   ├── main.py
│   │   │   ├── requirements.txt
│   │   │   ├── src/
│   │   │   │   ├── AudioManeger.py
│   │   │   │   ├── SpeechBase/
│   │   │   │   │   ├── asr.py
│   │   │   │   │   ├── nlp.py
│   │   │   │   │   ├── sql_helper.py
│   │   │   │   │   ├── tts.py
│   │   │   │   │   ├── vpr.py
│   │   │   │   │   └── vpr_encode.py
│   │   │   │   ├── WebsocketManeger.py
│   │   │   │   ├── ernie_sat.py
│   │   │   │   ├── finetune.py
│   │   │   │   ├── ge2e_clone.py
│   │   │   │   ├── robot.py
│   │   │   │   ├── tdnn_clone.py
│   │   │   │   └── util.py
│   │   │   └── vc.py
│   │   └── web_client/
│   │       ├── .gitignore
│   │       ├── index.html
│   │       ├── package.json
│   │       ├── src/
│   │       │   ├── App.vue
│   │       │   ├── api/
│   │       │   │   ├── API.js
│   │       │   │   ├── ApiASR.js
│   │       │   │   ├── ApiNLP.js
│   │       │   │   ├── ApiTTS.js
│   │       │   │   ├── ApiVC.js
│   │       │   │   └── ApiVPR.js
│   │       │   ├── components/
│   │       │   │   ├── Content/
│   │       │   │   │   ├── Header/
│   │       │   │   │   │   ├── Header.vue
│   │       │   │   │   │   └── style.less
│   │       │   │   │   └── Tail/
│   │       │   │   │       ├── Tail.vue
│   │       │   │   │       └── style.less
│   │       │   │   ├── Experience.vue
│   │       │   │   ├── SubMenu/
│   │       │   │   │   ├── ASR/
│   │       │   │   │   │   ├── ASR.vue
│   │       │   │   │   │   ├── ASRT.vue
│   │       │   │   │   │   ├── AudioFile/
│   │       │   │   │   │   │   ├── AudioFileIdentification.vue
│   │       │   │   │   │   │   └── style.less
│   │       │   │   │   │   ├── EndToEnd/
│   │       │   │   │   │   │   ├── EndToEndIdentification.vue
│   │       │   │   │   │   │   └── style.less
│   │       │   │   │   │   ├── RealTime/
│   │       │   │   │   │   │   ├── RealTime.vue
│   │       │   │   │   │   │   └── style.less
│   │       │   │   │   │   └── style.less
│   │       │   │   │   ├── ChatBot/
│   │       │   │   │   │   ├── ChatT.vue
│   │       │   │   │   │   └── style.less
│   │       │   │   │   ├── ERNIE_SAT/
│   │       │   │   │   │   └── ERNIE_SAT.vue
│   │       │   │   │   ├── FineTune/
│   │       │   │   │   │   └── FineTune.vue
│   │       │   │   │   ├── IE/
│   │       │   │   │   │   ├── IET.vue
│   │       │   │   │   │   └── style.less
│   │       │   │   │   ├── TTS/
│   │       │   │   │   │   ├── TTST.vue
│   │       │   │   │   │   └── style.less
│   │       │   │   │   ├── VPR/
│   │       │   │   │   │   ├── VPRT.vue
│   │       │   │   │   │   └── style.less
│   │       │   │   │   └── VoiceClone/
│   │       │   │   │       └── VoiceClone.vue
│   │       │   │   └── style.less
│   │       │   └── main.js
│   │       └── vite.config.js
│   ├── story_talker/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── ocr.py
│   │   ├── path.sh
│   │   └── run.sh
│   ├── streaming_asr_server/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── conf/
│   │   │   ├── application.yaml
│   │   │   ├── punc_application.yaml
│   │   │   ├── ws_conformer_application.yaml
│   │   │   ├── ws_conformer_talcs_application.yaml
│   │   │   ├── ws_conformer_wenetspeech_application.yaml
│   │   │   ├── ws_conformer_wenetspeech_application_faster.yaml
│   │   │   └── ws_ds2_application.yaml
│   │   ├── local/
│   │   │   ├── punc_server.py
│   │   │   ├── rtf_from_log.py
│   │   │   ├── streaming_asr_server.py
│   │   │   ├── test.sh
│   │   │   ├── websocket_client.py
│   │   │   └── websocket_client_srt.py
│   │   ├── run.sh
│   │   ├── server.sh
│   │   ├── test.sh
│   │   └── web/
│   │       ├── index.html
│   │       └── readme.md
│   ├── streaming_tts_server/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── client.sh
│   │   ├── conf/
│   │   │   ├── tts_online_application.yaml
│   │   │   └── tts_online_ws_application.yaml
│   │   └── server.sh
│   ├── streaming_tts_serving_fastdeploy/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── streaming_tts_serving/
│   │       ├── 1/
│   │       │   └── model.py
│   │       ├── config.pbtxt
│   │       └── stream_client.py
│   ├── style_fs2/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── path.sh
│   │   ├── run.sh
│   │   ├── sentences.txt
│   │   └── style_syn.py
│   ├── text_to_speech/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   └── whisper/
│       ├── README.md
│       ├── README_cn.md
│       └── run.sh
├── docker/
│   ├── ubuntu16-gpu/
│   │   └── Dockerfile
│   ├── ubuntu18-cpu/
│   │   └── Dockerfile
│   └── ubuntu20-cpu/
│       └── Dockerfile
├── docs/
│   ├── Makefile
│   ├── requirements.txt
│   ├── source/
│   │   ├── _static/
│   │   │   └── custom.css
│   │   ├── api/
│   │   │   ├── modules.rst
│   │   │   ├── paddlespeech.audio.features.layers.rst
│   │   │   ├── paddlespeech.audio.features.rst
│   │   │   ├── paddlespeech.audio.io.rst
│   │   │   ├── paddlespeech.audio.rst
│   │   │   ├── paddlespeech.audio.streamdata.autodecode.rst
│   │   │   ├── paddlespeech.audio.streamdata.cache.rst
│   │   │   ├── paddlespeech.audio.streamdata.compat.rst
│   │   │   ├── paddlespeech.audio.streamdata.extradatasets.rst
│   │   │   ├── paddlespeech.audio.streamdata.filters.rst
│   │   │   ├── paddlespeech.audio.streamdata.gopen.rst
│   │   │   ├── paddlespeech.audio.streamdata.handlers.rst
│   │   │   ├── paddlespeech.audio.streamdata.mix.rst
│   │   │   ├── paddlespeech.audio.streamdata.paddle_utils.rst
│   │   │   ├── paddlespeech.audio.streamdata.pipeline.rst
│   │   │   ├── paddlespeech.audio.streamdata.rst
│   │   │   ├── paddlespeech.audio.streamdata.shardlists.rst
│   │   │   ├── paddlespeech.audio.streamdata.tariterators.rst
│   │   │   ├── paddlespeech.audio.streamdata.utils.rst
│   │   │   ├── paddlespeech.audio.streamdata.writer.rst
│   │   │   ├── paddlespeech.audio.text.rst
│   │   │   ├── paddlespeech.audio.text.text_featurizer.rst
│   │   │   ├── paddlespeech.audio.text.utility.rst
│   │   │   ├── paddlespeech.audio.transform.add_deltas.rst
│   │   │   ├── paddlespeech.audio.transform.channel_selector.rst
│   │   │   ├── paddlespeech.audio.transform.cmvn.rst
│   │   │   ├── paddlespeech.audio.transform.functional.rst
│   │   │   ├── paddlespeech.audio.transform.perturb.rst
│   │   │   ├── paddlespeech.audio.transform.rst
│   │   │   ├── paddlespeech.audio.transform.spec_augment.rst
│   │   │   ├── paddlespeech.audio.transform.spectrogram.rst
│   │   │   ├── paddlespeech.audio.transform.transform_interface.rst
│   │   │   ├── paddlespeech.audio.transform.transformation.rst
│   │   │   ├── paddlespeech.audio.transform.wpe.rst
│   │   │   ├── paddlespeech.audio.utils.check_kwargs.rst
│   │   │   ├── paddlespeech.audio.utils.download.rst
│   │   │   ├── paddlespeech.audio.utils.dynamic_import.rst
│   │   │   ├── paddlespeech.audio.utils.error.rst
│   │   │   ├── paddlespeech.audio.utils.log.rst
│   │   │   ├── paddlespeech.audio.utils.numeric.rst
│   │   │   ├── paddlespeech.audio.utils.rst
│   │   │   ├── paddlespeech.audio.utils.tensor_utils.rst
│   │   │   ├── paddlespeech.audio.utils.time.rst
│   │   │   ├── paddlespeech.cli.asr.infer.rst
│   │   │   ├── paddlespeech.cli.asr.rst
│   │   │   ├── paddlespeech.cli.base_commands.rst
│   │   │   ├── paddlespeech.cli.cls.infer.rst
│   │   │   ├── paddlespeech.cli.cls.rst
│   │   │   ├── paddlespeech.cli.download.rst
│   │   │   ├── paddlespeech.cli.entry.rst
│   │   │   ├── paddlespeech.cli.executor.rst
│   │   │   ├── paddlespeech.cli.kws.infer.rst
│   │   │   ├── paddlespeech.cli.kws.rst
│   │   │   ├── paddlespeech.cli.log.rst
│   │   │   ├── paddlespeech.cli.rst
│   │   │   ├── paddlespeech.cli.st.infer.rst
│   │   │   ├── paddlespeech.cli.st.rst
│   │   │   ├── paddlespeech.cli.text.infer.rst
│   │   │   ├── paddlespeech.cli.text.rst
│   │   │   ├── paddlespeech.cli.tts.infer.rst
│   │   │   ├── paddlespeech.cli.tts.rst
│   │   │   ├── paddlespeech.cli.utils.rst
│   │   │   ├── paddlespeech.cli.vector.infer.rst
│   │   │   ├── paddlespeech.cli.vector.rst
│   │   │   ├── paddlespeech.cls.exps.panns.deploy.rst
│   │   │   ├── paddlespeech.cls.exps.panns.rst
│   │   │   ├── paddlespeech.cls.exps.rst
│   │   │   ├── paddlespeech.cls.models.panns.classifier.rst
│   │   │   ├── paddlespeech.cls.models.panns.panns.rst
│   │   │   ├── paddlespeech.cls.models.panns.rst
│   │   │   ├── paddlespeech.cls.models.rst
│   │   │   ├── paddlespeech.cls.rst
│   │   │   ├── paddlespeech.kws.exps.mdtc.collate.rst
│   │   │   ├── paddlespeech.kws.exps.mdtc.compute_det.rst
│   │   │   ├── paddlespeech.kws.exps.mdtc.plot_det_curve.rst
│   │   │   ├── paddlespeech.kws.exps.mdtc.rst
│   │   │   ├── paddlespeech.kws.exps.mdtc.score.rst
│   │   │   ├── paddlespeech.kws.exps.mdtc.train.rst
│   │   │   ├── paddlespeech.kws.exps.rst
│   │   │   ├── paddlespeech.kws.models.loss.rst
│   │   │   ├── paddlespeech.kws.models.mdtc.rst
│   │   │   ├── paddlespeech.kws.models.rst
│   │   │   ├── paddlespeech.kws.rst
│   │   │   ├── paddlespeech.resource.model_alias.rst
│   │   │   ├── paddlespeech.resource.pretrained_models.rst
│   │   │   ├── paddlespeech.resource.resource.rst
│   │   │   ├── paddlespeech.resource.rst
│   │   │   ├── paddlespeech.rst
│   │   │   ├── paddlespeech.s2t.decoders.beam_search.batch_beam_search.rst
│   │   │   ├── paddlespeech.s2t.decoders.beam_search.beam_search.rst
│   │   │   ├── paddlespeech.s2t.decoders.beam_search.rst
│   │   │   ├── paddlespeech.s2t.decoders.ctcdecoder.decoders_deprecated.rst
│   │   │   ├── paddlespeech.s2t.decoders.ctcdecoder.rst
│   │   │   ├── paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper.rst
│   │   │   ├── paddlespeech.s2t.decoders.recog.rst
│   │   │   ├── paddlespeech.s2t.decoders.rst
│   │   │   ├── paddlespeech.s2t.decoders.scorers.ctc.rst
│   │   │   ├── paddlespeech.s2t.decoders.scorers.ctc_prefix_score.rst
│   │   │   ├── paddlespeech.s2t.decoders.scorers.length_bonus.rst
│   │   │   ├── paddlespeech.s2t.decoders.scorers.rst
│   │   │   ├── paddlespeech.s2t.decoders.scorers.scorer_interface.rst
│   │   │   ├── paddlespeech.s2t.decoders.utils.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.deploy.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.deploy.runtime.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.deploy.server.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.export.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.test.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.test_export.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.test_wav.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.train.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.model.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.rst
│   │   │   ├── paddlespeech.s2t.exps.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.bin.alignment.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.bin.export.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.bin.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.bin.test.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.bin.test_wav.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.bin.train.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.model.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_kaldi.bin.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_kaldi.bin.test.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_kaldi.bin.train.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_kaldi.model.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_kaldi.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_st.bin.export.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_st.bin.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_st.bin.test.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_st.bin.train.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_st.model.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_st.rst
│   │   │   ├── paddlespeech.s2t.frontend.audio.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.augmentation.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.base.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.impulse_response.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.noise_perturb.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.online_bayesian_normalization.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.resample.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.shift_perturb.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.spec_augment.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.speed_perturb.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.volume_perturb.rst
│   │   │   ├── paddlespeech.s2t.frontend.featurizer.audio_featurizer.rst
│   │   │   ├── paddlespeech.s2t.frontend.featurizer.rst
│   │   │   ├── paddlespeech.s2t.frontend.featurizer.speech_featurizer.rst
│   │   │   ├── paddlespeech.s2t.frontend.featurizer.text_featurizer.rst
│   │   │   ├── paddlespeech.s2t.frontend.normalizer.rst
│   │   │   ├── paddlespeech.s2t.frontend.rst
│   │   │   ├── paddlespeech.s2t.frontend.speech.rst
│   │   │   ├── paddlespeech.s2t.frontend.utility.rst
│   │   │   ├── paddlespeech.s2t.io.batchfy.rst
│   │   │   ├── paddlespeech.s2t.io.collator.rst
│   │   │   ├── paddlespeech.s2t.io.converter.rst
│   │   │   ├── paddlespeech.s2t.io.dataloader.rst
│   │   │   ├── paddlespeech.s2t.io.dataset.rst
│   │   │   ├── paddlespeech.s2t.io.reader.rst
│   │   │   ├── paddlespeech.s2t.io.rst
│   │   │   ├── paddlespeech.s2t.io.sampler.rst
│   │   │   ├── paddlespeech.s2t.io.utility.rst
│   │   │   ├── paddlespeech.s2t.models.asr_interface.rst
│   │   │   ├── paddlespeech.s2t.models.ds2.conv.rst
│   │   │   ├── paddlespeech.s2t.models.ds2.deepspeech2.rst
│   │   │   ├── paddlespeech.s2t.models.ds2.rst
│   │   │   ├── paddlespeech.s2t.models.lm.dataset.rst
│   │   │   ├── paddlespeech.s2t.models.lm.rst
│   │   │   ├── paddlespeech.s2t.models.lm.transformer.rst
│   │   │   ├── paddlespeech.s2t.models.lm_interface.rst
│   │   │   ├── paddlespeech.s2t.models.rst
│   │   │   ├── paddlespeech.s2t.models.st_interface.rst
│   │   │   ├── paddlespeech.s2t.models.u2.rst
│   │   │   ├── paddlespeech.s2t.models.u2.u2.rst
│   │   │   ├── paddlespeech.s2t.models.u2.updater.rst
│   │   │   ├── paddlespeech.s2t.models.u2_st.rst
│   │   │   ├── paddlespeech.s2t.models.u2_st.u2_st.rst
│   │   │   ├── paddlespeech.s2t.modules.activation.rst
│   │   │   ├── paddlespeech.s2t.modules.align.rst
│   │   │   ├── paddlespeech.s2t.modules.attention.rst
│   │   │   ├── paddlespeech.s2t.modules.cmvn.rst
│   │   │   ├── paddlespeech.s2t.modules.conformer_convolution.rst
│   │   │   ├── paddlespeech.s2t.modules.crf.rst
│   │   │   ├── paddlespeech.s2t.modules.ctc.rst
│   │   │   ├── paddlespeech.s2t.modules.decoder.rst
│   │   │   ├── paddlespeech.s2t.modules.decoder_layer.rst
│   │   │   ├── paddlespeech.s2t.modules.embedding.rst
│   │   │   ├── paddlespeech.s2t.modules.encoder.rst
│   │   │   ├── paddlespeech.s2t.modules.encoder_layer.rst
│   │   │   ├── paddlespeech.s2t.modules.initializer.rst
│   │   │   ├── paddlespeech.s2t.modules.loss.rst
│   │   │   ├── paddlespeech.s2t.modules.mask.rst
│   │   │   ├── paddlespeech.s2t.modules.positionwise_feed_forward.rst
│   │   │   ├── paddlespeech.s2t.modules.rst
│   │   │   ├── paddlespeech.s2t.modules.subsampling.rst
│   │   │   ├── paddlespeech.s2t.rst
│   │   │   ├── paddlespeech.s2t.training.cli.rst
│   │   │   ├── paddlespeech.s2t.training.extensions.evaluator.rst
│   │   │   ├── paddlespeech.s2t.training.extensions.extension.rst
│   │   │   ├── paddlespeech.s2t.training.extensions.plot.rst
│   │   │   ├── paddlespeech.s2t.training.extensions.rst
│   │   │   ├── paddlespeech.s2t.training.gradclip.rst
│   │   │   ├── paddlespeech.s2t.training.optimizer.rst
│   │   │   ├── paddlespeech.s2t.training.reporter.rst
│   │   │   ├── paddlespeech.s2t.training.rst
│   │   │   ├── paddlespeech.s2t.training.scheduler.rst
│   │   │   ├── paddlespeech.s2t.training.timer.rst
│   │   │   ├── paddlespeech.s2t.training.trainer.rst
│   │   │   ├── paddlespeech.s2t.training.triggers.compare_value_trigger.rst
│   │   │   ├── paddlespeech.s2t.training.triggers.interval_trigger.rst
│   │   │   ├── paddlespeech.s2t.training.triggers.limit_trigger.rst
│   │   │   ├── paddlespeech.s2t.training.triggers.rst
│   │   │   ├── paddlespeech.s2t.training.triggers.time_trigger.rst
│   │   │   ├── paddlespeech.s2t.training.triggers.utils.rst
│   │   │   ├── paddlespeech.s2t.training.updaters.rst
│   │   │   ├── paddlespeech.s2t.training.updaters.standard_updater.rst
│   │   │   ├── paddlespeech.s2t.training.updaters.updater.rst
│   │   │   ├── paddlespeech.s2t.utils.asr_utils.rst
│   │   │   ├── paddlespeech.s2t.utils.bleu_score.rst
│   │   │   ├── paddlespeech.s2t.utils.check_kwargs.rst
│   │   │   ├── paddlespeech.s2t.utils.checkpoint.rst
│   │   │   ├── paddlespeech.s2t.utils.cli_readers.rst
│   │   │   ├── paddlespeech.s2t.utils.cli_utils.rst
│   │   │   ├── paddlespeech.s2t.utils.cli_writers.rst
│   │   │   ├── paddlespeech.s2t.utils.ctc_utils.rst
│   │   │   ├── paddlespeech.s2t.utils.dynamic_import.rst
│   │   │   ├── paddlespeech.s2t.utils.dynamic_pip_install.rst
│   │   │   ├── paddlespeech.s2t.utils.error_rate.rst
│   │   │   ├── paddlespeech.s2t.utils.layer_tools.rst
│   │   │   ├── paddlespeech.s2t.utils.log.rst
│   │   │   ├── paddlespeech.s2t.utils.mp_tools.rst
│   │   │   ├── paddlespeech.s2t.utils.profiler.rst
│   │   │   ├── paddlespeech.s2t.utils.rst
│   │   │   ├── paddlespeech.s2t.utils.socket_server.rst
│   │   │   ├── paddlespeech.s2t.utils.spec_augment.rst
│   │   │   ├── paddlespeech.s2t.utils.tensor_utils.rst
│   │   │   ├── paddlespeech.s2t.utils.text_grid.rst
│   │   │   ├── paddlespeech.s2t.utils.utility.rst
│   │   │   ├── paddlespeech.server.base_commands.rst
│   │   │   ├── paddlespeech.server.bin.paddlespeech_client.rst
│   │   │   ├── paddlespeech.server.bin.paddlespeech_server.rst
│   │   │   ├── paddlespeech.server.bin.rst
│   │   │   ├── paddlespeech.server.engine.acs.python.rst
│   │   │   ├── paddlespeech.server.engine.acs.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.ctc_endpoint.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.ctc_search.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.onnx.asr_engine.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.onnx.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.paddleinference.asr_engine.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.paddleinference.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.python.asr_engine.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.python.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.rst
│   │   │   ├── paddlespeech.server.engine.asr.paddleinference.asr_engine.rst
│   │   │   ├── paddlespeech.server.engine.asr.paddleinference.rst
│   │   │   ├── paddlespeech.server.engine.asr.python.asr_engine.rst
│   │   │   ├── paddlespeech.server.engine.asr.python.rst
│   │   │   ├── paddlespeech.server.engine.asr.rst
│   │   │   ├── paddlespeech.server.engine.base_engine.rst
│   │   │   ├── paddlespeech.server.engine.cls.paddleinference.cls_engine.rst
│   │   │   ├── paddlespeech.server.engine.cls.paddleinference.rst
│   │   │   ├── paddlespeech.server.engine.cls.python.cls_engine.rst
│   │   │   ├── paddlespeech.server.engine.cls.python.rst
│   │   │   ├── paddlespeech.server.engine.cls.rst
│   │   │   ├── paddlespeech.server.engine.engine_factory.rst
│   │   │   ├── paddlespeech.server.engine.engine_pool.rst
│   │   │   ├── paddlespeech.server.engine.engine_warmup.rst
│   │   │   ├── paddlespeech.server.engine.rst
│   │   │   ├── paddlespeech.server.engine.text.python.rst
│   │   │   ├── paddlespeech.server.engine.text.python.text_engine.rst
│   │   │   ├── paddlespeech.server.engine.text.rst
│   │   │   ├── paddlespeech.server.engine.tts.online.onnx.rst
│   │   │   ├── paddlespeech.server.engine.tts.online.onnx.tts_engine.rst
│   │   │   ├── paddlespeech.server.engine.tts.online.python.rst
│   │   │   ├── paddlespeech.server.engine.tts.online.python.tts_engine.rst
│   │   │   ├── paddlespeech.server.engine.tts.online.rst
│   │   │   ├── paddlespeech.server.engine.tts.paddleinference.rst
│   │   │   ├── paddlespeech.server.engine.tts.paddleinference.tts_engine.rst
│   │   │   ├── paddlespeech.server.engine.tts.python.rst
│   │   │   ├── paddlespeech.server.engine.tts.python.tts_engine.rst
│   │   │   ├── paddlespeech.server.engine.tts.rst
│   │   │   ├── paddlespeech.server.engine.vector.python.rst
│   │   │   ├── paddlespeech.server.engine.vector.python.vector_engine.rst
│   │   │   ├── paddlespeech.server.engine.vector.rst
│   │   │   ├── paddlespeech.server.entry.rst
│   │   │   ├── paddlespeech.server.executor.rst
│   │   │   ├── paddlespeech.server.restful.acs_api.rst
│   │   │   ├── paddlespeech.server.restful.api.rst
│   │   │   ├── paddlespeech.server.restful.asr_api.rst
│   │   │   ├── paddlespeech.server.restful.cls_api.rst
│   │   │   ├── paddlespeech.server.restful.request.rst
│   │   │   ├── paddlespeech.server.restful.response.rst
│   │   │   ├── paddlespeech.server.restful.rst
│   │   │   ├── paddlespeech.server.restful.text_api.rst
│   │   │   ├── paddlespeech.server.restful.tts_api.rst
│   │   │   ├── paddlespeech.server.restful.vector_api.rst
│   │   │   ├── paddlespeech.server.rst
│   │   │   ├── paddlespeech.server.tests.asr.offline.http_client.rst
│   │   │   ├── paddlespeech.server.tests.asr.offline.rst
│   │   │   ├── paddlespeech.server.tests.asr.rst
│   │   │   ├── paddlespeech.server.tests.rst
│   │   │   ├── paddlespeech.server.util.rst
│   │   │   ├── paddlespeech.server.utils.audio_handler.rst
│   │   │   ├── paddlespeech.server.utils.audio_process.rst
│   │   │   ├── paddlespeech.server.utils.buffer.rst
│   │   │   ├── paddlespeech.server.utils.config.rst
│   │   │   ├── paddlespeech.server.utils.errors.rst
│   │   │   ├── paddlespeech.server.utils.exception.rst
│   │   │   ├── paddlespeech.server.utils.onnx_infer.rst
│   │   │   ├── paddlespeech.server.utils.paddle_predictor.rst
│   │   │   ├── paddlespeech.server.utils.rst
│   │   │   ├── paddlespeech.server.utils.util.rst
│   │   │   ├── paddlespeech.server.utils.vad.rst
│   │   │   ├── paddlespeech.server.ws.api.rst
│   │   │   ├── paddlespeech.server.ws.asr_api.rst
│   │   │   ├── paddlespeech.server.ws.rst
│   │   │   ├── paddlespeech.server.ws.tts_api.rst
│   │   │   ├── paddlespeech.t2s.audio.audio.rst
│   │   │   ├── paddlespeech.t2s.audio.codec.rst
│   │   │   ├── paddlespeech.t2s.audio.rst
│   │   │   ├── paddlespeech.t2s.audio.spec_normalizer.rst
│   │   │   ├── paddlespeech.t2s.datasets.am_batch_fn.rst
│   │   │   ├── paddlespeech.t2s.datasets.batch.rst
│   │   │   ├── paddlespeech.t2s.datasets.data_table.rst
│   │   │   ├── paddlespeech.t2s.datasets.dataset.rst
│   │   │   ├── paddlespeech.t2s.datasets.get_feats.rst
│   │   │   ├── paddlespeech.t2s.datasets.ljspeech.rst
│   │   │   ├── paddlespeech.t2s.datasets.preprocess_utils.rst
│   │   │   ├── paddlespeech.t2s.datasets.rst
│   │   │   ├── paddlespeech.t2s.datasets.sampler.rst
│   │   │   ├── paddlespeech.t2s.datasets.vocoder_batch_fn.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.align.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.synthesize_e2e.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.train.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.utils.rst
│   │   │   ├── paddlespeech.t2s.exps.fastspeech2.gen_gta_mel.rst
│   │   │   ├── paddlespeech.t2s.exps.fastspeech2.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.fastspeech2.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.fastspeech2.rst
│   │   │   ├── paddlespeech.t2s.exps.fastspeech2.train.rst
│   │   │   ├── paddlespeech.t2s.exps.fastspeech2.vc2_infer.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.hifigan.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.hifigan.train.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.multi_band_melgan.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.multi_band_melgan.train.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.synthesize_from_wav.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.train.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.style_melgan.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.style_melgan.train.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.inference.rst
│   │   │   ├── paddlespeech.t2s.exps.inference_streaming.rst
│   │   │   ├── paddlespeech.t2s.exps.ort_predict.rst
│   │   │   ├── paddlespeech.t2s.exps.ort_predict_e2e.rst
│   │   │   ├── paddlespeech.t2s.exps.ort_predict_streaming.rst
│   │   │   ├── paddlespeech.t2s.exps.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.gen_gta_mel.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.inference.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.synthesize_e2e.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.train.rst
│   │   │   ├── paddlespeech.t2s.exps.stream_play_tts.rst
│   │   │   ├── paddlespeech.t2s.exps.syn_utils.rst
│   │   │   ├── paddlespeech.t2s.exps.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.synthesize_e2e.rst
│   │   │   ├── paddlespeech.t2s.exps.synthesize_streaming.rst
│   │   │   ├── paddlespeech.t2s.exps.tacotron2.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.tacotron2.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.tacotron2.rst
│   │   │   ├── paddlespeech.t2s.exps.tacotron2.train.rst
│   │   │   ├── paddlespeech.t2s.exps.transformer_tts.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.transformer_tts.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.transformer_tts.rst
│   │   │   ├── paddlespeech.t2s.exps.transformer_tts.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.transformer_tts.synthesize_e2e.rst
│   │   │   ├── paddlespeech.t2s.exps.transformer_tts.train.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.synthesize_e2e.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.train.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.voice_cloning.rst
│   │   │   ├── paddlespeech.t2s.exps.voice_cloning.rst
│   │   │   ├── paddlespeech.t2s.exps.waveflow.config.rst
│   │   │   ├── paddlespeech.t2s.exps.waveflow.ljspeech.rst
│   │   │   ├── paddlespeech.t2s.exps.waveflow.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.waveflow.rst
│   │   │   ├── paddlespeech.t2s.exps.waveflow.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.waveflow.train.rst
│   │   │   ├── paddlespeech.t2s.exps.wavernn.rst
│   │   │   ├── paddlespeech.t2s.exps.wavernn.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.wavernn.train.rst
│   │   │   ├── paddlespeech.t2s.frontend.arpabet.rst
│   │   │   ├── paddlespeech.t2s.frontend.g2pw.dataset.rst
│   │   │   ├── paddlespeech.t2s.frontend.g2pw.onnx_api.rst
│   │   │   ├── paddlespeech.t2s.frontend.g2pw.rst
│   │   │   ├── paddlespeech.t2s.frontend.g2pw.utils.rst
│   │   │   ├── paddlespeech.t2s.frontend.generate_lexicon.rst
│   │   │   ├── paddlespeech.t2s.frontend.mix_frontend.rst
│   │   │   ├── paddlespeech.t2s.frontend.normalizer.abbrrviation.rst
│   │   │   ├── paddlespeech.t2s.frontend.normalizer.acronyms.rst
│   │   │   ├── paddlespeech.t2s.frontend.normalizer.normalizer.rst
│   │   │   ├── paddlespeech.t2s.frontend.normalizer.numbers.rst
│   │   │   ├── paddlespeech.t2s.frontend.normalizer.rst
│   │   │   ├── paddlespeech.t2s.frontend.normalizer.width.rst
│   │   │   ├── paddlespeech.t2s.frontend.phonectic.rst
│   │   │   ├── paddlespeech.t2s.frontend.punctuation.rst
│   │   │   ├── paddlespeech.t2s.frontend.rst
│   │   │   ├── paddlespeech.t2s.frontend.tone_sandhi.rst
│   │   │   ├── paddlespeech.t2s.frontend.vocab.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_frontend.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.char_convert.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.chronology.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.constants.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.num.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.phonecode.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.quantifier.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.text_normlization.rst
│   │   │   ├── paddlespeech.t2s.models.ernie_sat.ernie_sat.rst
│   │   │   ├── paddlespeech.t2s.models.ernie_sat.ernie_sat_updater.rst
│   │   │   ├── paddlespeech.t2s.models.ernie_sat.rst
│   │   │   ├── paddlespeech.t2s.models.fastspeech2.fastspeech2.rst
│   │   │   ├── paddlespeech.t2s.models.fastspeech2.fastspeech2_updater.rst
│   │   │   ├── paddlespeech.t2s.models.fastspeech2.rst
│   │   │   ├── paddlespeech.t2s.models.hifigan.hifigan.rst
│   │   │   ├── paddlespeech.t2s.models.hifigan.hifigan_updater.rst
│   │   │   ├── paddlespeech.t2s.models.hifigan.rst
│   │   │   ├── paddlespeech.t2s.models.melgan.melgan.rst
│   │   │   ├── paddlespeech.t2s.models.melgan.multi_band_melgan_updater.rst
│   │   │   ├── paddlespeech.t2s.models.melgan.rst
│   │   │   ├── paddlespeech.t2s.models.melgan.style_melgan.rst
│   │   │   ├── paddlespeech.t2s.models.melgan.style_melgan_updater.rst
│   │   │   ├── paddlespeech.t2s.models.parallel_wavegan.parallel_wavegan.rst
│   │   │   ├── paddlespeech.t2s.models.parallel_wavegan.parallel_wavegan_updater.rst
│   │   │   ├── paddlespeech.t2s.models.parallel_wavegan.rst
│   │   │   ├── paddlespeech.t2s.models.rst
│   │   │   ├── paddlespeech.t2s.models.speedyspeech.rst
│   │   │   ├── paddlespeech.t2s.models.speedyspeech.speedyspeech.rst
│   │   │   ├── paddlespeech.t2s.models.speedyspeech.speedyspeech_updater.rst
│   │   │   ├── paddlespeech.t2s.models.tacotron2.rst
│   │   │   ├── paddlespeech.t2s.models.tacotron2.tacotron2.rst
│   │   │   ├── paddlespeech.t2s.models.tacotron2.tacotron2_updater.rst
│   │   │   ├── paddlespeech.t2s.models.transformer_tts.rst
│   │   │   ├── paddlespeech.t2s.models.transformer_tts.transformer_tts.rst
│   │   │   ├── paddlespeech.t2s.models.transformer_tts.transformer_tts_updater.rst
│   │   │   ├── paddlespeech.t2s.models.vits.duration_predictor.rst
│   │   │   ├── paddlespeech.t2s.models.vits.flow.rst
│   │   │   ├── paddlespeech.t2s.models.vits.generator.rst
│   │   │   ├── paddlespeech.t2s.models.vits.monotonic_align.core.rst
│   │   │   ├── paddlespeech.t2s.models.vits.monotonic_align.rst
│   │   │   ├── paddlespeech.t2s.models.vits.monotonic_align.setup.rst
│   │   │   ├── paddlespeech.t2s.models.vits.posterior_encoder.rst
│   │   │   ├── paddlespeech.t2s.models.vits.residual_coupling.rst
│   │   │   ├── paddlespeech.t2s.models.vits.rst
│   │   │   ├── paddlespeech.t2s.models.vits.text_encoder.rst
│   │   │   ├── paddlespeech.t2s.models.vits.transform.rst
│   │   │   ├── paddlespeech.t2s.models.vits.vits.rst
│   │   │   ├── paddlespeech.t2s.models.vits.vits_updater.rst
│   │   │   ├── paddlespeech.t2s.models.vits.wavenet.residual_block.rst
│   │   │   ├── paddlespeech.t2s.models.vits.wavenet.rst
│   │   │   ├── paddlespeech.t2s.models.vits.wavenet.wavenet.rst
│   │   │   ├── paddlespeech.t2s.models.waveflow.rst
│   │   │   ├── paddlespeech.t2s.models.wavernn.rst
│   │   │   ├── paddlespeech.t2s.models.wavernn.wavernn.rst
│   │   │   ├── paddlespeech.t2s.models.wavernn.wavernn_updater.rst
│   │   │   ├── paddlespeech.t2s.modules.activation.rst
│   │   │   ├── paddlespeech.t2s.modules.causal_conv.rst
│   │   │   ├── paddlespeech.t2s.modules.conformer.convolution.rst
│   │   │   ├── paddlespeech.t2s.modules.conformer.encoder_layer.rst
│   │   │   ├── paddlespeech.t2s.modules.conformer.rst
│   │   │   ├── paddlespeech.t2s.modules.conv.rst
│   │   │   ├── paddlespeech.t2s.modules.geometry.rst
│   │   │   ├── paddlespeech.t2s.modules.layer_norm.rst
│   │   │   ├── paddlespeech.t2s.modules.losses.rst
│   │   │   ├── paddlespeech.t2s.modules.masked_fill.rst
│   │   │   ├── paddlespeech.t2s.modules.nets_utils.rst
│   │   │   ├── paddlespeech.t2s.modules.normalizer.rst
│   │   │   ├── paddlespeech.t2s.modules.positional_encoding.rst
│   │   │   ├── paddlespeech.t2s.modules.pqmf.rst
│   │   │   ├── paddlespeech.t2s.modules.predictor.duration_predictor.rst
│   │   │   ├── paddlespeech.t2s.modules.predictor.length_regulator.rst
│   │   │   ├── paddlespeech.t2s.modules.predictor.rst
│   │   │   ├── paddlespeech.t2s.modules.predictor.variance_predictor.rst
│   │   │   ├── paddlespeech.t2s.modules.residual_block.rst
│   │   │   ├── paddlespeech.t2s.modules.residual_stack.rst
│   │   │   ├── paddlespeech.t2s.modules.rst
│   │   │   ├── paddlespeech.t2s.modules.style_encoder.rst
│   │   │   ├── paddlespeech.t2s.modules.tacotron2.attentions.rst
│   │   │   ├── paddlespeech.t2s.modules.tacotron2.decoder.rst
│   │   │   ├── paddlespeech.t2s.modules.tacotron2.encoder.rst
│   │   │   ├── paddlespeech.t2s.modules.tacotron2.rst
│   │   │   ├── paddlespeech.t2s.modules.tade_res_block.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.attention.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.decoder.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.decoder_layer.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.embedding.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.encoder.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.encoder_layer.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.lightconv.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.mask.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.multi_layer_conv.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.positionwise_feed_forward.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.repeat.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.subsampling.rst
│   │   │   ├── paddlespeech.t2s.modules.upsample.rst
│   │   │   ├── paddlespeech.t2s.rst
│   │   │   ├── paddlespeech.t2s.training.cli.rst
│   │   │   ├── paddlespeech.t2s.training.default_config.rst
│   │   │   ├── paddlespeech.t2s.training.experiment.rst
│   │   │   ├── paddlespeech.t2s.training.extension.rst
│   │   │   ├── paddlespeech.t2s.training.extensions.evaluator.rst
│   │   │   ├── paddlespeech.t2s.training.extensions.rst
│   │   │   ├── paddlespeech.t2s.training.extensions.snapshot.rst
│   │   │   ├── paddlespeech.t2s.training.extensions.visualizer.rst
│   │   │   ├── paddlespeech.t2s.training.optimizer.rst
│   │   │   ├── paddlespeech.t2s.training.reporter.rst
│   │   │   ├── paddlespeech.t2s.training.rst
│   │   │   ├── paddlespeech.t2s.training.seeding.rst
│   │   │   ├── paddlespeech.t2s.training.trainer.rst
│   │   │   ├── paddlespeech.t2s.training.trigger.rst
│   │   │   ├── paddlespeech.t2s.training.triggers.interval_trigger.rst
│   │   │   ├── paddlespeech.t2s.training.triggers.limit_trigger.rst
│   │   │   ├── paddlespeech.t2s.training.triggers.rst
│   │   │   ├── paddlespeech.t2s.training.triggers.time_trigger.rst
│   │   │   ├── paddlespeech.t2s.training.updater.rst
│   │   │   ├── paddlespeech.t2s.training.updaters.rst
│   │   │   ├── paddlespeech.t2s.training.updaters.standard_updater.rst
│   │   │   ├── paddlespeech.t2s.utils.checkpoint.rst
│   │   │   ├── paddlespeech.t2s.utils.display.rst
│   │   │   ├── paddlespeech.t2s.utils.error_rate.rst
│   │   │   ├── paddlespeech.t2s.utils.h5_utils.rst
│   │   │   ├── paddlespeech.t2s.utils.internals.rst
│   │   │   ├── paddlespeech.t2s.utils.layer_tools.rst
│   │   │   ├── paddlespeech.t2s.utils.mp_tools.rst
│   │   │   ├── paddlespeech.t2s.utils.profiler.rst
│   │   │   ├── paddlespeech.t2s.utils.rst
│   │   │   ├── paddlespeech.t2s.utils.scheduler.rst
│   │   │   ├── paddlespeech.text.exps.ernie_linear.avg_model.rst
│   │   │   ├── paddlespeech.text.exps.ernie_linear.punc_restore.rst
│   │   │   ├── paddlespeech.text.exps.ernie_linear.rst
│   │   │   ├── paddlespeech.text.exps.ernie_linear.test.rst
│   │   │   ├── paddlespeech.text.exps.ernie_linear.train.rst
│   │   │   ├── paddlespeech.text.exps.rst
│   │   │   ├── paddlespeech.text.models.ernie_crf.model.rst
│   │   │   ├── paddlespeech.text.models.ernie_crf.rst
│   │   │   ├── paddlespeech.text.models.ernie_linear.dataset.rst
│   │   │   ├── paddlespeech.text.models.ernie_linear.ernie_linear.rst
│   │   │   ├── paddlespeech.text.models.ernie_linear.ernie_linear_updater.rst
│   │   │   ├── paddlespeech.text.models.ernie_linear.rst
│   │   │   ├── paddlespeech.text.models.rst
│   │   │   ├── paddlespeech.text.rst
│   │   │   ├── paddlespeech.utils.dynamic_import.rst
│   │   │   ├── paddlespeech.utils.env.rst
│   │   │   ├── paddlespeech.utils.rst
│   │   │   ├── paddlespeech.vector.cluster.diarization.rst
│   │   │   ├── paddlespeech.vector.cluster.plda.rst
│   │   │   ├── paddlespeech.vector.cluster.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.audio_processor.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.config.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.dataset_processors.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.inference.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.preprocess.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.random_cycle.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.speaker_verification_dataset.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.train.rst
│   │   │   ├── paddlespeech.vector.exps.rst
│   │   │   ├── paddlespeech.vector.io.augment.rst
│   │   │   ├── paddlespeech.vector.io.batch.rst
│   │   │   ├── paddlespeech.vector.io.dataset.rst
│   │   │   ├── paddlespeech.vector.io.dataset_from_json.rst
│   │   │   ├── paddlespeech.vector.io.embedding_norm.rst
│   │   │   ├── paddlespeech.vector.io.rst
│   │   │   ├── paddlespeech.vector.io.signal_processing.rst
│   │   │   ├── paddlespeech.vector.models.ecapa_tdnn.rst
│   │   │   ├── paddlespeech.vector.models.lstm_speaker_encoder.rst
│   │   │   ├── paddlespeech.vector.models.rst
│   │   │   ├── paddlespeech.vector.modules.layer.rst
│   │   │   ├── paddlespeech.vector.modules.loss.rst
│   │   │   ├── paddlespeech.vector.modules.rst
│   │   │   ├── paddlespeech.vector.modules.sid_model.rst
│   │   │   ├── paddlespeech.vector.rst
│   │   │   ├── paddlespeech.vector.training.rst
│   │   │   ├── paddlespeech.vector.training.scheduler.rst
│   │   │   ├── paddlespeech.vector.training.seeding.rst
│   │   │   ├── paddlespeech.vector.utils.rst
│   │   │   ├── paddlespeech.vector.utils.time.rst
│   │   │   ├── paddlespeech.vector.utils.vector_utils.rst
│   │   │   └── paddlespeech.version.rst
│   │   ├── asr/
│   │   │   ├── PPASR.md
│   │   │   ├── PPASR_cn.md
│   │   │   ├── data_preparation.md
│   │   │   ├── feature_list.md
│   │   │   ├── models_introduction.md
│   │   │   ├── ngram_lm.md
│   │   │   └── quick_start.md
│   │   ├── audio/
│   │   │   ├── _static/
│   │   │   │   └── custom.css
│   │   │   ├── _templates/
│   │   │   │   ├── module.rst_t
│   │   │   │   ├── package.rst_t
│   │   │   │   └── toc.rst_t
│   │   │   ├── conf.py
│   │   │   └── index.rst
│   │   ├── audio_api/
│   │   │   ├── modules.rst
│   │   │   ├── paddleaudio.backends.common.rst
│   │   │   ├── paddleaudio.backends.no_backend.rst
│   │   │   ├── paddleaudio.backends.rst
│   │   │   ├── paddleaudio.backends.soundfile_backend.rst
│   │   │   ├── paddleaudio.backends.sox_io_backend.rst
│   │   │   ├── paddleaudio.backends.utils.rst
│   │   │   ├── paddleaudio.compliance.kaldi.rst
│   │   │   ├── paddleaudio.compliance.librosa.rst
│   │   │   ├── paddleaudio.compliance.rst
│   │   │   ├── paddleaudio.datasets.dataset.rst
│   │   │   ├── paddleaudio.datasets.esc50.rst
│   │   │   ├── paddleaudio.datasets.gtzan.rst
│   │   │   ├── paddleaudio.datasets.hey_snips.rst
│   │   │   ├── paddleaudio.datasets.rirs_noises.rst
│   │   │   ├── paddleaudio.datasets.rst
│   │   │   ├── paddleaudio.datasets.tess.rst
│   │   │   ├── paddleaudio.datasets.urban_sound.rst
│   │   │   ├── paddleaudio.datasets.voxceleb.rst
│   │   │   ├── paddleaudio.features.layers.rst
│   │   │   ├── paddleaudio.features.rst
│   │   │   ├── paddleaudio.functional.functional.rst
│   │   │   ├── paddleaudio.functional.rst
│   │   │   ├── paddleaudio.functional.window.rst
│   │   │   ├── paddleaudio.kaldi.kaldi.rst
│   │   │   ├── paddleaudio.kaldi.rst
│   │   │   ├── paddleaudio.metric.eer.rst
│   │   │   ├── paddleaudio.metric.rst
│   │   │   ├── paddleaudio.rst
│   │   │   ├── paddleaudio.sox_effects.rst
│   │   │   ├── paddleaudio.sox_effects.sox_effects.rst
│   │   │   ├── paddleaudio.utils.download.rst
│   │   │   ├── paddleaudio.utils.env.rst
│   │   │   ├── paddleaudio.utils.error.rst
│   │   │   ├── paddleaudio.utils.log.rst
│   │   │   ├── paddleaudio.utils.numeric.rst
│   │   │   ├── paddleaudio.utils.rst
│   │   │   ├── paddleaudio.utils.sox_utils.rst
│   │   │   ├── paddleaudio.utils.tensor_utils.rst
│   │   │   └── paddleaudio.utils.time.rst
│   │   ├── cls/
│   │   │   ├── custom_dataset.md
│   │   │   └── quick_start.md
│   │   ├── conf.py
│   │   ├── demo_video.rst
│   │   ├── dependencies.md
│   │   ├── index.rst
│   │   ├── install.md
│   │   ├── install_cn.md
│   │   ├── introduction.md
│   │   ├── reference.md
│   │   ├── released_model.md
│   │   ├── streaming_asr_demo_video.rst
│   │   ├── streaming_tts_demo_video.rst
│   │   ├── tts/
│   │   │   ├── PPTTS.md
│   │   │   ├── PPTTS_cn.md
│   │   │   ├── README.md
│   │   │   ├── advanced_usage.md
│   │   │   ├── demo.rst
│   │   │   ├── demo_2.rst
│   │   │   ├── gan_vocoder.md
│   │   │   ├── models_introduction.md
│   │   │   ├── quick_start.md
│   │   │   ├── quick_start_cn.md
│   │   │   ├── svs_music_score.md
│   │   │   ├── test_sentence.txt
│   │   │   ├── tts_datasets.md
│   │   │   ├── tts_papers.md
│   │   │   └── zh_text_frontend.md
│   │   ├── tts_demo_video.rst
│   │   └── vpr/
│   │       ├── PPVPR.md
│   │       └── PPVPR_cn.md
│   ├── topic/
│   │   ├── ctc/
│   │   │   ├── ctc_loss.ipynb
│   │   │   ├── ctc_loss_compare.ipynb
│   │   │   └── ctc_loss_speed_compare.ipynb
│   │   ├── frontend/
│   │   │   └── g2p.md
│   │   ├── gan_vocoder/
│   │   │   └── gan_vocoder.ipynb
│   │   └── package_release/
│   │       └── python_package_release.md
│   └── tutorial/
│       ├── .gitkeep
│       ├── asr/
│       │   ├── tutorial_deepspeech2.ipynb
│       │   └── tutorial_transformer.ipynb
│       ├── cls/
│       │   └── cls_tutorial.ipynb
│       ├── st/
│       │   └── st_tutorial.ipynb
│       └── tts/
│           └── tts_tutorial.ipynb
├── examples/
│   ├── aishell/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── asr0/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── conf/
│   │   │   │   ├── deepspeech2.yaml
│   │   │   │   ├── deepspeech2_online.yaml
│   │   │   │   ├── preprocess.yaml
│   │   │   │   └── tuning/
│   │   │   │       ├── chunk_decode.yaml
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── download_lm_ch.sh
│   │   │   │   ├── export.sh
│   │   │   │   ├── test.sh
│   │   │   │   ├── test_export.sh
│   │   │   │   ├── test_wav.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── asr1/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── conf/
│   │   │   │   ├── augmentation.json
│   │   │   │   ├── chunk_conformer.yaml
│   │   │   │   ├── chunk_roformer.yaml
│   │   │   │   ├── chunk_roformer_bidecoder.yaml
│   │   │   │   ├── chunk_squeezeformer.yaml
│   │   │   │   ├── conformer.yaml
│   │   │   │   ├── preprocess.yaml
│   │   │   │   ├── squeezeformer.yaml
│   │   │   │   ├── transformer.yaml
│   │   │   │   └── tuning/
│   │   │   │       ├── chunk_decode.yaml
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── aishell_train_lms.sh
│   │   │   │   ├── align.sh
│   │   │   │   ├── data.sh
│   │   │   │   ├── export.sh
│   │   │   │   ├── test.sh
│   │   │   │   ├── test_wav.sh
│   │   │   │   ├── tlg.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   └── asr3/
│   │       ├── README.md
│   │       ├── RESULT.md
│   │       ├── cmd.sh
│   │       ├── conf/
│   │       │   ├── preprocess.yaml
│   │       │   ├── train_with_wav2vec.yaml
│   │       │   ├── tuning/
│   │       │   │   └── decode.yaml
│   │       │   ├── wav2vec2ASR.yaml
│   │       │   └── wav2vec2ASR_adadelta.yaml
│   │       ├── local/
│   │       │   ├── aishell_prepare.py
│   │       │   ├── data.sh
│   │       │   ├── test.sh
│   │       │   ├── test_wav.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── aishell3/
│   │   ├── README.md
│   │   ├── ernie_sat/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── tts3/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   ├── conformer.yaml
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── lite_predict.sh
│   │   │   │   ├── ort_predict.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   └── run.sh
│   │   ├── vc0/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── train.sh
│   │   │   │   └── voice_cloning.sh
│   │   │   └── run.sh
│   │   ├── vc1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── voice_cloning.sh
│   │   │   └── run.sh
│   │   ├── vc2/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   └── voice_cloning.sh
│   │   │   └── run.sh
│   │   ├── vits/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── vits-vc/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── train.sh
│   │   │   │   └── voice_cloning.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   └── preprocess.sh
│   │   │   └── run.sh
│   │   └── voc5/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── default.yaml
│   │       └── run.sh
│   ├── aishell3_vctk/
│   │   ├── README.md
│   │   └── ernie_sat/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── default.yaml
│   │       ├── local/
│   │       │   ├── preprocess.sh
│   │       │   └── synthesize_e2e.sh
│   │       └── run.sh
│   ├── ami/
│   │   ├── README.md
│   │   └── sd0/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── ecapa_tdnn.yaml
│   │       ├── local/
│   │       │   ├── ami_prepare.py
│   │       │   ├── ami_splits.py
│   │       │   ├── compute_embdding.py
│   │       │   ├── dataio.py
│   │       │   ├── experiment.py
│   │       │   └── process.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── callcenter/
│   │   ├── README.md
│   │   └── asr1/
│   │       ├── .gitignore
│   │       ├── RESULTS.md
│   │       ├── conf/
│   │       │   ├── augmentation.json
│   │       │   ├── chunk_conformer.yaml
│   │       │   ├── conformer.yaml
│   │       │   ├── preprocess.yaml
│   │       │   └── tuning/
│   │       │       ├── chunk_decode.yaml
│   │       │       └── decode.yaml
│   │       ├── local/
│   │       │   ├── align.sh
│   │       │   ├── data.sh
│   │       │   ├── download_lm_ch.sh
│   │       │   ├── export.sh
│   │       │   ├── test.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── canton/
│   │   └── tts3/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── default.yaml
│   │       ├── local/
│   │       │   ├── inference.sh
│   │       │   ├── ort_predict.sh
│   │       │   ├── preprocess.sh
│   │       │   └── synthesize_e2e.sh
│   │       └── run.sh
│   ├── csmsc/
│   │   ├── README.md
│   │   ├── jets/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── tts0/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── tts2/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── inference_mlu.sh
│   │   │   │   ├── inference_npu.sh
│   │   │   │   ├── inference_xpu.sh
│   │   │   │   ├── lite_predict.sh
│   │   │   │   ├── ort_predict.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   ├── synthesize_e2e_mlu.sh
│   │   │   │   ├── synthesize_e2e_npu.sh
│   │   │   │   ├── synthesize_e2e_xpu.sh
│   │   │   │   ├── synthesize_mlu.sh
│   │   │   │   ├── synthesize_npu.sh
│   │   │   │   ├── synthesize_xpu.sh
│   │   │   │   ├── train.sh
│   │   │   │   ├── train_mlu.sh
│   │   │   │   ├── train_npu.sh
│   │   │   │   └── train_xpu.sh
│   │   │   ├── path.sh
│   │   │   ├── run.sh
│   │   │   ├── run_mlu.sh
│   │   │   ├── run_npu.sh
│   │   │   └── run_xpu.sh
│   │   ├── tts3/
│   │   │   ├── README.md
│   │   │   ├── README_cn.md
│   │   │   ├── conf/
│   │   │   │   ├── cnndecoder.yaml
│   │   │   │   ├── conformer.yaml
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── PTQ_dynamic.sh
│   │   │   │   ├── PTQ_static.sh
│   │   │   │   ├── export2lite.sh
│   │   │   │   ├── inference.sh
│   │   │   │   ├── inference_streaming.sh
│   │   │   │   ├── inference_xpu.sh
│   │   │   │   ├── lite_predict.sh
│   │   │   │   ├── lite_predict_streaming.sh
│   │   │   │   ├── ort_predict.sh
│   │   │   │   ├── ort_predict_streaming.sh
│   │   │   │   ├── paddle2onnx.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── simple.lexicon
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   ├── synthesize_e2e_xpu.sh
│   │   │   │   ├── synthesize_streaming.sh
│   │   │   │   ├── synthesize_xpu.sh
│   │   │   │   ├── train.sh
│   │   │   │   └── train_xpu.sh
│   │   │   ├── path.sh
│   │   │   ├── run.sh
│   │   │   ├── run_cnndecoder.sh
│   │   │   └── run_xpu.sh
│   │   ├── tts3_rhy/
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   └── synthesize_e2e.sh
│   │   │   └── run.sh
│   │   ├── vits/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── lite_predict.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── PTQ_static.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc3/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   ├── default.yaml
│   │   │   │   └── finetune.yaml
│   │   │   ├── local/
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── synthesize_e2e.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc4/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   └── synthesize.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc5/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   ├── default.yaml
│   │   │   │   ├── finetune.yaml
│   │   │   │   └── iSTFT.yaml
│   │   │   ├── finetune.sh
│   │   │   ├── iSTFTNet.md
│   │   │   ├── local/
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── synthesize_e2e.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   └── voc6/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── default.yaml
│   │       ├── local/
│   │       │   ├── preprocess.sh
│   │       │   └── synthesize.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── esc50/
│   │   ├── README.md
│   │   ├── RESULTS.md
│   │   └── cls0/
│   │       ├── conf/
│   │       │   └── panns.yaml
│   │       ├── local/
│   │       │   ├── export.sh
│   │       │   ├── infer.sh
│   │       │   ├── static_model_infer.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── hey_snips/
│   │   ├── README.md
│   │   └── kws0/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── mdtc.yaml
│   │       ├── local/
│   │       │   ├── plot.sh
│   │       │   ├── score.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── iwslt2012/
│   │   └── punc0/
│   │       ├── README.md
│   │       ├── RESULTS.md
│   │       ├── conf/
│   │       │   ├── default.yaml
│   │       │   ├── ernie-3.0-base.yaml
│   │       │   ├── ernie-3.0-medium.yaml
│   │       │   ├── ernie-3.0-mini.yaml
│   │       │   ├── ernie-3.0-nano-zh.yaml
│   │       │   └── ernie-tiny.yaml
│   │       ├── local/
│   │       │   ├── data.sh
│   │       │   ├── preprocess.py
│   │       │   ├── punc_restore.sh
│   │       │   ├── test.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── librispeech/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── asr0/
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── conf/
│   │   │   │   ├── deepspeech2.yaml
│   │   │   │   ├── deepspeech2_online.yaml
│   │   │   │   ├── preprocess.yaml
│   │   │   │   └── tuning/
│   │   │   │       ├── chunk_decode.yaml
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── download_lm_en.sh
│   │   │   │   ├── export.sh
│   │   │   │   ├── test.sh
│   │   │   │   ├── test_wav.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── asr1/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── cmd.sh
│   │   │   ├── conf/
│   │   │   │   ├── augmentation.json
│   │   │   │   ├── chunk_conformer.yaml
│   │   │   │   ├── chunk_transformer.yaml
│   │   │   │   ├── conformer.yaml
│   │   │   │   ├── preprocess.yaml
│   │   │   │   ├── transformer.yaml
│   │   │   │   └── tuning/
│   │   │   │       ├── chunk_decode.yaml
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── align.sh
│   │   │   │   ├── data.sh
│   │   │   │   ├── download_lm_en.sh
│   │   │   │   ├── export.sh
│   │   │   │   ├── test.sh
│   │   │   │   ├── test_wav.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── asr2/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── cmd.sh
│   │   │   ├── conf/
│   │   │   │   ├── augmentation.json
│   │   │   │   ├── decode/
│   │   │   │   │   ├── decode.yaml
│   │   │   │   │   ├── decode_att.yaml
│   │   │   │   │   ├── decode_base.yaml
│   │   │   │   │   ├── decode_ctc.yaml
│   │   │   │   │   └── decode_wo_lm.yaml
│   │   │   │   ├── fbank.conf
│   │   │   │   ├── lm/
│   │   │   │   │   └── transformer.yaml
│   │   │   │   ├── pitch.conf
│   │   │   │   ├── preprocess.yaml
│   │   │   │   └── transformer.yaml
│   │   │   ├── local/
│   │   │   │   ├── align.sh
│   │   │   │   ├── cacu_perplexity.sh
│   │   │   │   ├── data.sh
│   │   │   │   ├── data_prep.sh
│   │   │   │   ├── download_lm_en.sh
│   │   │   │   ├── espnet_json_to_manifest.py
│   │   │   │   ├── export.sh
│   │   │   │   ├── recog.sh
│   │   │   │   ├── test.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── asr3/
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── cmd.sh
│   │   │   ├── conf/
│   │   │   │   ├── preprocess.yaml
│   │   │   │   ├── tuning/
│   │   │   │   │   └── decode.yaml
│   │   │   │   └── wav2vec2ASR.yaml
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── test.sh
│   │   │   │   ├── test_wav.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── asr4/
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── cmd.sh
│   │   │   ├── conf/
│   │   │   │   ├── config.json
│   │   │   │   ├── hubertASR.yaml
│   │   │   │   ├── preprocess.yaml
│   │   │   │   ├── preprocessor_config.json
│   │   │   │   └── tuning/
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── test.sh
│   │   │   │   ├── test_wav.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   └── asr5/
│   │       ├── README.md
│   │       ├── RESULTS.md
│   │       ├── avg.sh
│   │       ├── cmd.sh
│   │       ├── compute_wer.py
│   │       ├── conf/
│   │       │   ├── preprocess.yaml
│   │       │   ├── preprocessor_config.json
│   │       │   ├── tuning/
│   │       │   │   └── decode.yaml
│   │       │   └── wavlmASR.yaml
│   │       ├── local/
│   │       │   ├── data.sh
│   │       │   ├── test.sh
│   │       │   ├── test_wav.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── ljspeech/
│   │   ├── README.md
│   │   ├── tts0/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── synthesize_e2e.sh
│   │   │   └── run.sh
│   │   ├── tts1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── tts3/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── lite_predict.sh
│   │   │   │   ├── ort_predict.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── synthesize_e2e.sh
│   │   │   └── run.sh
│   │   ├── voc0/
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   └── preprocess.sh
│   │   │   └── run.sh
│   │   └── voc5/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── default.yaml
│   │       └── run.sh
│   ├── mustc/
│   │   └── st1/
│   │       ├── cmd.sh
│   │       ├── conf/
│   │       │   ├── fbank.conf
│   │       │   ├── pitch.conf
│   │       │   ├── transformer_de.yaml
│   │       │   ├── transformer_es.yaml
│   │       │   ├── transformer_fr.yaml
│   │       │   ├── transformer_it.yaml
│   │       │   ├── transformer_nl.yaml
│   │       │   ├── transformer_pt.yaml
│   │       │   ├── transformer_ro.yaml
│   │       │   └── transformer_ru.yaml
│   │       ├── local/
│   │       │   ├── augmentation.json
│   │       │   ├── data.sh
│   │       │   ├── data_prep.sh
│   │       │   ├── divide_lang.sh
│   │       │   ├── remove_punctuation.pl
│   │       │   ├── test.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── opencpop/
│   │   ├── README.md
│   │   ├── svs1/
│   │   │   ├── README.md
│   │   │   ├── README_cn.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── pinyin_to_phone.txt
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── dygraph_to_static.sh
│   │   │   │   └── preprocess.sh
│   │   │   └── run.sh
│   │   └── voc5/
│   │       ├── conf/
│   │       │   ├── default.yaml
│   │       │   └── finetune.yaml
│   │       ├── finetune.sh
│   │       ├── local/
│   │       │   └── dygraph_to_static.sh
│   │       └── run.sh
│   ├── other/
│   │   ├── augmentation/
│   │   │   └── augmentation.json
│   │   ├── cc-cedict/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   └── parser.py
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── g2p/
│   │   │   ├── README.md
│   │   │   ├── compare_badcase.py
│   │   │   ├── get_g2p_data.py
│   │   │   ├── path.sh
│   │   │   ├── run.sh
│   │   │   └── test_g2p.py
│   │   ├── ge2e/
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── mfa/
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   ├── detect_oov.py
│   │   │   │   ├── generate_canton_lexicon_wavlabs.py
│   │   │   │   ├── generate_lexicon.py
│   │   │   │   ├── reorganize_aishell3.py
│   │   │   │   ├── reorganize_baker.py
│   │   │   │   ├── reorganize_ljspeech.py
│   │   │   │   └── reorganize_vctk.py
│   │   │   ├── run.sh
│   │   │   └── run_canton.sh
│   │   ├── ngram_lm/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   └── s0/
│   │   │       ├── .gitignore
│   │   │       ├── README.md
│   │   │       ├── data/
│   │   │       │   ├── README.md
│   │   │       │   ├── custom_confusion.txt
│   │   │       │   └── text_correct.txt
│   │   │       ├── local/
│   │   │       │   ├── build_zh_lm.sh
│   │   │       │   ├── download_lm_zh.sh
│   │   │       │   └── kenlm_score_test.py
│   │   │       ├── path.sh
│   │   │       ├── requirements.txt
│   │   │       └── run.sh
│   │   ├── punctuation_restoration/
│   │   │   └── README.md
│   │   ├── rhy/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── data/
│   │   │   │   └── rhy_token
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── pre_for_sp_aishell.py
│   │   │   │   ├── pre_for_sp_csmsc.py
│   │   │   │   ├── rhy_predict.sh
│   │   │   │   ├── test.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── spm/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── path.sh
│   │   │   ├── run.sh
│   │   │   └── text
│   │   ├── tn/
│   │   │   ├── README.md
│   │   │   ├── data/
│   │   │   │   └── textnorm_test_cases.txt
│   │   │   ├── get_textnorm_data.py
│   │   │   ├── path.sh
│   │   │   ├── run.sh
│   │   │   └── test_textnorm.py
│   │   └── tts_finetune/
│   │       └── tts3/
│   │           ├── README.md
│   │           ├── conf/
│   │           │   ├── fastspeech2_layers.txt
│   │           │   └── finetune.yaml
│   │           ├── local/
│   │           │   ├── check_oov.py
│   │           │   ├── extract_feature.py
│   │           │   ├── finetune.py
│   │           │   ├── generate_duration.py
│   │           │   ├── get_mfa_result.py
│   │           │   └── prepare_env.py
│   │           ├── path.sh
│   │           ├── run.sh
│   │           ├── run_en.sh
│   │           └── run_mix.sh
│   ├── tal_cs/
│   │   └── asr1/
│   │       ├── README.md
│   │       ├── RESULTS.md
│   │       ├── conf/
│   │       │   ├── chunk_conformer.yaml
│   │       │   ├── conformer.yaml
│   │       │   ├── preprocess.yaml
│   │       │   └── tuning/
│   │       │       ├── chunk_decode.yaml
│   │       │       └── decode.yaml
│   │       ├── local/
│   │       │   ├── data.sh
│   │       │   ├── test.sh
│   │       │   ├── test_wav.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── ted_en_zh/
│   │   ├── README.md
│   │   ├── st0/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── conf/
│   │   │   │   ├── preprocess.yaml
│   │   │   │   ├── transformer.yaml
│   │   │   │   ├── transformer_mtl_noam.yaml
│   │   │   │   └── tuning/
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── test.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   └── st1/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── RESULTS.md
│   │       ├── cmd.sh
│   │       ├── conf/
│   │       │   ├── fbank.conf
│   │       │   ├── pitch.conf
│   │       │   ├── preprocess.yaml
│   │       │   ├── transformer.yaml
│   │       │   ├── transformer_mtl_noam.yaml
│   │       │   └── tuning/
│   │       │       └── decode.yaml
│   │       ├── local/
│   │       │   ├── convert_torch_to_paddle.py
│   │       │   ├── data.sh
│   │       │   ├── divide_lang.sh
│   │       │   ├── download_pretrain.sh
│   │       │   ├── remove_punctuation.pl
│   │       │   ├── ted_en_zh.py
│   │       │   ├── test.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── tess/
│   │   ├── README.md
│   │   └── cls0/
│   │       ├── conf/
│   │       │   ├── panns_logmelspectrogram.yaml
│   │       │   ├── panns_melspectrogram.yaml
│   │       │   ├── panns_mfcc.yaml
│   │       │   └── panns_spectrogram.yaml
│   │       ├── local/
│   │       │   ├── train.py
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── thchs30/
│   │   ├── README.md
│   │   └── align0/
│   │       ├── README.md
│   │       ├── data/
│   │       │   └── dict/
│   │       │       └── syllable.lexicon
│   │       ├── local/
│   │       │   ├── data.sh
│   │       │   ├── gen_word2phone.py
│   │       │   └── reorganize_thchs30.py
│   │       ├── path.sh
│   │       └── run.sh
│   ├── timit/
│   │   ├── README.md
│   │   └── asr1/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── RESULTS.md
│   │       ├── conf/
│   │       │   ├── augmentation.json
│   │       │   ├── dev_spk.list
│   │       │   ├── preprocess.yaml
│   │       │   ├── test_spk.list
│   │       │   ├── transformer.yaml
│   │       │   └── tuning/
│   │       │       └── decode.yaml
│   │       ├── local/
│   │       │   ├── align.sh
│   │       │   ├── data.sh
│   │       │   ├── export.sh
│   │       │   ├── test.sh
│   │       │   ├── timit_data_prep.sh
│   │       │   ├── timit_norm_trans.pl
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── tiny/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── asr0/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   ├── deepspeech2.yaml
│   │   │   │   ├── deepspeech2_online.yaml
│   │   │   │   ├── preprocess.yaml
│   │   │   │   └── tuning/
│   │   │   │       ├── chunk_decode.yaml
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── download_lm_en.sh
│   │   │   │   ├── export.sh
│   │   │   │   ├── test.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   └── asr1/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── conf/
│   │       │   ├── augmentation.json
│   │       │   ├── chunk_confermer.yaml
│   │       │   ├── chunk_transformer.yaml
│   │       │   ├── conformer.yaml
│   │       │   ├── preprocess.yaml
│   │       │   ├── transformer.yaml
│   │       │   └── tuning/
│   │       │       ├── chunk_decode.yaml
│   │       │       └── decode.yaml
│   │       ├── local/
│   │       │   ├── align.sh
│   │       │   ├── data.sh
│   │       │   ├── export.sh
│   │       │   ├── test.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── vctk/
│   │   ├── README.md
│   │   ├── ernie_sat/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── synthesize_e2e.sh
│   │   │   └── run.sh
│   │   ├── tts3/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── lite_predict.sh
│   │   │   │   ├── ort_predict.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── synthesize_e2e.sh
│   │   │   └── run.sh
│   │   ├── vc3/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── train.sh
│   │   │   │   └── voice_conversion.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   └── preprocess.sh
│   │   │   └── run.sh
│   │   └── voc5/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── default.yaml
│   │       └── run.sh
│   ├── voxceleb/
│   │   ├── README.md
│   │   └── sv0/
│   │       ├── README.md
│   │       ├── RESULT.md
│   │       ├── conf/
│   │       │   ├── ecapa_tdnn.yaml
│   │       │   └── ecapa_tdnn_small.yaml
│   │       ├── local/
│   │       │   ├── convert.sh
│   │       │   ├── data.sh
│   │       │   ├── data_prepare.py
│   │       │   ├── emb.sh
│   │       │   ├── make_rirs_noise_csv_dataset_from_json.py
│   │       │   ├── make_vox_csv_dataset_from_json.py
│   │       │   ├── make_voxceleb_kaldi_trial.py
│   │       │   ├── test.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── wenetspeech/
│   │   ├── README.md
│   │   ├── asr0/
│   │   │   └── RESULTS.md
│   │   └── asr1/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── RESULTS.md
│   │       ├── conf/
│   │       │   ├── chunk_conformer.yaml
│   │       │   ├── chunk_conformer_u2pp.yaml
│   │       │   ├── conformer.yaml
│   │       │   ├── preprocess.yaml
│   │       │   └── tuning/
│   │       │       ├── chunk_decode.yaml
│   │       │       └── decode.yaml
│   │       ├── local/
│   │       │   ├── data.sh
│   │       │   ├── export.sh
│   │       │   ├── extract_meta.py
│   │       │   ├── process_opus.py
│   │       │   ├── quant.sh
│   │       │   ├── test.sh
│   │       │   ├── test_wav.sh
│   │       │   ├── train.sh
│   │       │   └── wenetspeech_data_prep.sh
│   │       ├── path.sh
│   │       └── run.sh
│   └── zh_en_tts/
│       └── tts3/
│           ├── .gitignore
│           ├── README.md
│           ├── conf/
│           │   └── default.yaml
│           ├── local/
│           │   ├── inference.sh
│           │   ├── mfa_download.sh
│           │   ├── model_download.sh
│           │   ├── ort_predict.sh
│           │   ├── preprocess.sh
│           │   ├── synthesize.sh
│           │   └── synthesize_e2e.sh
│           └── run.sh
├── paddlespeech/
│   ├── __init__.py
│   ├── audio/
│   │   ├── .gitignore
│   │   ├── __init__.py
│   │   ├── backends/
│   │   │   ├── __init__.py
│   │   │   ├── common.py
│   │   │   └── soundfile_backend.py
│   │   ├── compliance/
│   │   │   ├── __init__.py
│   │   │   ├── kaldi.py
│   │   │   └── librosa.py
│   │   ├── datasets/
│   │   │   ├── __init__.py
│   │   │   ├── dataset.py
│   │   │   ├── esc50.py
│   │   │   └── voxceleb.py
│   │   ├── functional/
│   │   │   ├── __init__.py
│   │   │   ├── functional.py
│   │   │   └── window.py
│   │   ├── streamdata/
│   │   │   ├── __init__.py
│   │   │   ├── autodecode.py
│   │   │   ├── cache.py
│   │   │   ├── compat.py
│   │   │   ├── extradatasets.py
│   │   │   ├── filters.py
│   │   │   ├── gopen.py
│   │   │   ├── handlers.py
│   │   │   ├── mix.py
│   │   │   ├── paddle_utils.py
│   │   │   ├── pipeline.py
│   │   │   ├── shardlists.py
│   │   │   ├── soundfile.py
│   │   │   ├── tariterators.py
│   │   │   ├── utils.py
│   │   │   └── writer.py
│   │   ├── text/
│   │   │   ├── __init__.py
│   │   │   ├── text_featurizer.py
│   │   │   └── utility.py
│   │   ├── transform/
│   │   │   ├── __init__.py
│   │   │   ├── add_deltas.py
│   │   │   ├── channel_selector.py
│   │   │   ├── cmvn.py
│   │   │   ├── functional.py
│   │   │   ├── perturb.py
│   │   │   ├── spec_augment.py
│   │   │   ├── spectrogram.py
│   │   │   ├── transform_interface.py
│   │   │   ├── transformation.py
│   │   │   └── wpe.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── check_kwargs.py
│   │       ├── download.py
│   │       ├── dynamic_import.py
│   │       ├── error.py
│   │       ├── log.py
│   │       ├── numeric.py
│   │       ├── tensor_utils.py
│   │       └── time.py
│   ├── audiotools/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── core/
│   │   │   ├── __init__.py
│   │   │   ├── _julius.py
│   │   │   ├── audio_signal.py
│   │   │   ├── display.py
│   │   │   ├── dsp.py
│   │   │   ├── effects.py
│   │   │   ├── ffmpeg.py
│   │   │   ├── loudness.py
│   │   │   └── util.py
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   ├── datasets.py
│   │   │   ├── preprocess.py
│   │   │   └── transforms.py
│   │   ├── metrics/
│   │   │   ├── __init__.py
│   │   │   └── quality.py
│   │   ├── ml/
│   │   │   ├── __init__.py
│   │   │   ├── accelerator.py
│   │   │   ├── basemodel.py
│   │   │   └── decorators.py
│   │   └── post.py
│   ├── cli/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── __init__.py
│   │   ├── asr/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── base_commands.py
│   │   ├── cls/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── download.py
│   │   ├── entry.py
│   │   ├── executor.py
│   │   ├── kws/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── log.py
│   │   ├── ssl/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── st/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── text/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── tts/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── utils.py
│   │   ├── vector/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   └── whisper/
│   │       ├── __init__.py
│   │       └── infer.py
│   ├── cls/
│   │   ├── __init__.py
│   │   ├── exps/
│   │   │   ├── __init__.py
│   │   │   └── panns/
│   │   │       ├── __init__.py
│   │   │       ├── deploy/
│   │   │       │   ├── __init__.py
│   │   │       │   └── predict.py
│   │   │       ├── export_model.py
│   │   │       ├── predict.py
│   │   │       └── train.py
│   │   └── models/
│   │       ├── __init__.py
│   │       └── panns/
│   │           ├── __init__.py
│   │           ├── classifier.py
│   │           └── panns.py
│   ├── dataset/
│   │   ├── __init__.py
│   │   ├── aidatatang_200zh/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── aidatatang_200zh.py
│   │   ├── aishell/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── aishell.py
│   │   ├── download.py
│   │   └── s2t/
│   │       ├── __init__.py
│   │       ├── avg_model.py
│   │       ├── build_vocab.py
│   │       ├── compute_mean_std.py
│   │       ├── compute_wer.py
│   │       ├── format_data.py
│   │       └── format_rsl.py
│   ├── kws/
│   │   ├── __init__.py
│   │   ├── exps/
│   │   │   ├── __init__.py
│   │   │   └── mdtc/
│   │   │       ├── __init__.py
│   │   │       ├── collate.py
│   │   │       ├── compute_det.py
│   │   │       ├── plot_det_curve.py
│   │   │       ├── score.py
│   │   │       └── train.py
│   │   └── models/
│   │       ├── __init__.py
│   │       ├── loss.py
│   │       └── mdtc.py
│   ├── resource/
│   │   ├── __init__.py
│   │   ├── model_alias.py
│   │   ├── pretrained_models.py
│   │   └── resource.py
│   ├── s2t/
│   │   ├── __init__.py
│   │   ├── decoders/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── beam_search/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── batch_beam_search.py
│   │   │   │   └── beam_search.py
│   │   │   ├── ctcdecoder/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── decoders_deprecated.py
│   │   │   │   ├── scorer_deprecated.py
│   │   │   │   ├── swig_wrapper.py
│   │   │   │   └── tests/
│   │   │   │       └── test_decoders.py
│   │   │   ├── recog.py
│   │   │   ├── recog_bin.py
│   │   │   ├── scorers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── ctc.py
│   │   │   │   ├── ctc_prefix_score.py
│   │   │   │   ├── length_bonus.py
│   │   │   │   ├── ngram.py
│   │   │   │   └── scorer_interface.py
│   │   │   └── utils.py
│   │   ├── exps/
│   │   │   ├── __init__.py
│   │   │   ├── deepspeech2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── deploy/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── client.py
│   │   │   │   │   │   ├── record.py
│   │   │   │   │   │   ├── runtime.py
│   │   │   │   │   │   ├── send.py
│   │   │   │   │   │   └── server.py
│   │   │   │   │   ├── export.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   ├── test_export.py
│   │   │   │   │   ├── test_wav.py
│   │   │   │   │   └── train.py
│   │   │   │   └── model.py
│   │   │   ├── hubert/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   ├── test_wav.py
│   │   │   │   │   └── train.py
│   │   │   │   └── model.py
│   │   │   ├── lm/
│   │   │   │   └── transformer/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── bin/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   └── cacu_perplexity.py
│   │   │   │       └── lm_cacu_perplexity.py
│   │   │   ├── u2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── alignment.py
│   │   │   │   │   ├── export.py
│   │   │   │   │   ├── quant.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   ├── test_wav.py
│   │   │   │   │   └── train.py
│   │   │   │   ├── model.py
│   │   │   │   └── trainer.py
│   │   │   ├── u2_kaldi/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── recog.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   └── train.py
│   │   │   │   └── model.py
│   │   │   ├── u2_st/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── export.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   └── train.py
│   │   │   │   └── model.py
│   │   │   ├── wav2vec2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   ├── test_wav.py
│   │   │   │   │   └── train.py
│   │   │   │   └── model.py
│   │   │   ├── wavlm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   ├── test_wav.py
│   │   │   │   │   └── train.py
│   │   │   │   └── model.py
│   │   │   └── whisper/
│   │   │       └── test_wav.py
│   │   ├── frontend/
│   │   │   ├── __init__.py
│   │   │   ├── audio.py
│   │   │   ├── augmentor/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── augmentation.py
│   │   │   │   ├── base.py
│   │   │   │   ├── impulse_response.py
│   │   │   │   ├── noise_perturb.py
│   │   │   │   ├── online_bayesian_normalization.py
│   │   │   │   ├── resample.py
│   │   │   │   ├── shift_perturb.py
│   │   │   │   ├── spec_augment.py
│   │   │   │   ├── speed_perturb.py
│   │   │   │   └── volume_perturb.py
│   │   │   ├── featurizer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── audio_featurizer.py
│   │   │   │   ├── speech_featurizer.py
│   │   │   │   └── text_featurizer.py
│   │   │   ├── normalizer.py
│   │   │   ├── speech.py
│   │   │   └── utility.py
│   │   ├── io/
│   │   │   ├── __init__.py
│   │   │   ├── batchfy.py
│   │   │   ├── collator.py
│   │   │   ├── converter.py
│   │   │   ├── dataloader.py
│   │   │   ├── dataset.py
│   │   │   ├── reader.py
│   │   │   ├── sampler.py
│   │   │   ├── speechbrain/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── batch.py
│   │   │   │   ├── data_pipeline.py
│   │   │   │   ├── data_utils.py
│   │   │   │   ├── dataio.py
│   │   │   │   ├── dataloader.py
│   │   │   │   ├── dataset.py
│   │   │   │   ├── depgraph.py
│   │   │   │   ├── make_dataloader.py
│   │   │   │   ├── sampler.py
│   │   │   │   └── sb_pipeline.py
│   │   │   └── utility.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── asr_interface.py
│   │   │   ├── ds2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── conv.py
│   │   │   │   └── deepspeech2.py
│   │   │   ├── hubert/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hubert_ASR.py
│   │   │   │   └── modules/
│   │   │   │       ├── __init__.py
│   │   │   │       └── hubert_model.py
│   │   │   ├── lm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dataset.py
│   │   │   │   └── transformer.py
│   │   │   ├── lm_interface.py
│   │   │   ├── st_interface.py
│   │   │   ├── u2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── u2.py
│   │   │   │   └── updater.py
│   │   │   ├── u2_st/
│   │   │   │   ├── __init__.py
│   │   │   │   └── u2_st.py
│   │   │   ├── wav2vec2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── modules/
│   │   │   │   │   ├── VanillaNN.py
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── activations.py
│   │   │   │   │   ├── containers.py
│   │   │   │   │   ├── linear.py
│   │   │   │   │   ├── modeling_outputs.py
│   │   │   │   │   ├── modeling_wav2vec2.py
│   │   │   │   │   ├── normalization.py
│   │   │   │   │   └── wav2vec2_model.py
│   │   │   │   ├── processing/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── signal_processing.py
│   │   │   │   │   └── speech_augmentation.py
│   │   │   │   └── wav2vec2_ASR.py
│   │   │   ├── wavlm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── modules/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── activations.py
│   │   │   │   │   ├── functional.py
│   │   │   │   │   └── modules.py
│   │   │   │   ├── wavlm_asr.py
│   │   │   │   └── wavlm_paddle.py
│   │   │   └── whisper/
│   │   │       ├── __init__.py
│   │   │       ├── tokenizer.py
│   │   │       ├── utils.py
│   │   │       ├── whisper.py
│   │   │       └── whisper_LICENSE
│   │   ├── modules/
│   │   │   ├── __init__.py
│   │   │   ├── activation.py
│   │   │   ├── align.py
│   │   │   ├── attention.py
│   │   │   ├── cmvn.py
│   │   │   ├── conformer_convolution.py
│   │   │   ├── conv2d.py
│   │   │   ├── crf.py
│   │   │   ├── ctc.py
│   │   │   ├── decoder.py
│   │   │   ├── decoder_layer.py
│   │   │   ├── embedding.py
│   │   │   ├── encoder.py
│   │   │   ├── encoder_layer.py
│   │   │   ├── fbank.py
│   │   │   ├── initializer.py
│   │   │   ├── loss.py
│   │   │   ├── mask.py
│   │   │   ├── positionwise_feed_forward.py
│   │   │   ├── subsampling.py
│   │   │   └── time_reduction.py
│   │   ├── training/
│   │   │   ├── __init__.py
│   │   │   ├── cli.py
│   │   │   ├── extensions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── evaluator.py
│   │   │   │   ├── extension.py
│   │   │   │   ├── plot.py
│   │   │   │   ├── snapshot.py
│   │   │   │   └── visualizer.py
│   │   │   ├── optimizer/
│   │   │   │   ├── __init__.py
│   │   │   │   └── adadelta.py
│   │   │   ├── reporter.py
│   │   │   ├── scheduler.py
│   │   │   ├── timer.py
│   │   │   ├── trainer.py
│   │   │   ├── triggers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── compare_value_trigger.py
│   │   │   │   ├── interval_trigger.py
│   │   │   │   ├── limit_trigger.py
│   │   │   │   ├── time_trigger.py
│   │   │   │   └── utils.py
│   │   │   └── updaters/
│   │   │       ├── __init__.py
│   │   │       ├── standard_updater.py
│   │   │       ├── trainer.py
│   │   │       └── updater.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── asr_utils.py
│   │       ├── bleu_score.py
│   │       ├── check_kwargs.py
│   │       ├── checkpoint.py
│   │       ├── cli_readers.py
│   │       ├── cli_utils.py
│   │       ├── cli_writers.py
│   │       ├── ctc_utils.py
│   │       ├── dynamic_import.py
│   │       ├── dynamic_pip_install.py
│   │       ├── error_rate.py
│   │       ├── layer_tools.py
│   │       ├── log.py
│   │       ├── mp_tools.py
│   │       ├── profiler.py
│   │       ├── socket_server.py
│   │       ├── spec_augment.py
│   │       ├── tensor_utils.py
│   │       ├── text_grid.py
│   │       └── utility.py
│   ├── server/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── __init__.py
│   │   ├── base_commands.py
│   │   ├── bin/
│   │   │   ├── __init__.py
│   │   │   ├── paddlespeech_client.py
│   │   │   └── paddlespeech_server.py
│   │   ├── conf/
│   │   │   ├── application.yaml
│   │   │   ├── tts_online_application.yaml
│   │   │   ├── vector_application.yaml
│   │   │   ├── ws_conformer_application.yaml
│   │   │   ├── ws_conformer_wenetspeech_application_faster.yaml
│   │   │   └── ws_ds2_application.yaml
│   │   ├── engine/
│   │   │   ├── __init__.py
│   │   │   ├── acs/
│   │   │   │   ├── __init__.py
│   │   │   │   └── python/
│   │   │   │       ├── __init__.py
│   │   │   │       └── acs_engine.py
│   │   │   ├── asr/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── online/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── ctc_endpoint.py
│   │   │   │   │   ├── ctc_search.py
│   │   │   │   │   ├── onnx/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── asr_engine.py
│   │   │   │   │   ├── paddleinference/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── asr_engine.py
│   │   │   │   │   └── python/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── asr_engine.py
│   │   │   │   ├── paddleinference/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── asr_engine.py
│   │   │   │   └── python/
│   │   │   │       ├── __init__.py
│   │   │   │       └── asr_engine.py
│   │   │   ├── base_engine.py
│   │   │   ├── cls/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── paddleinference/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── cls_engine.py
│   │   │   │   └── python/
│   │   │   │       ├── __init__.py
│   │   │   │       └── cls_engine.py
│   │   │   ├── engine_factory.py
│   │   │   ├── engine_pool.py
│   │   │   ├── engine_warmup.py
│   │   │   ├── text/
│   │   │   │   ├── __init__.py
│   │   │   │   └── python/
│   │   │   │       ├── __init__.py
│   │   │   │       └── text_engine.py
│   │   │   ├── tts/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── online/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── onnx/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── tts_engine.py
│   │   │   │   │   └── python/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── tts_engine.py
│   │   │   │   ├── paddleinference/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── tts_engine.py
│   │   │   │   └── python/
│   │   │   │       ├── __init__.py
│   │   │   │       └── tts_engine.py
│   │   │   └── vector/
│   │   │       ├── __init__.py
│   │   │       └── python/
│   │   │           ├── __init__.py
│   │   │           └── vector_engine.py
│   │   ├── entry.py
│   │   ├── executor.py
│   │   ├── restful/
│   │   │   ├── __init__.py
│   │   │   ├── acs_api.py
│   │   │   ├── api.py
│   │   │   ├── asr_api.py
│   │   │   ├── cls_api.py
│   │   │   ├── request.py
│   │   │   ├── response.py
│   │   │   ├── text_api.py
│   │   │   ├── tts_api.py
│   │   │   └── vector_api.py
│   │   ├── tests/
│   │   │   ├── __init__.py
│   │   │   ├── asr/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── offline/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── http_client.py
│   │   │   │   └── online/
│   │   │   │       ├── README.md
│   │   │   │       ├── README_cn.md
│   │   │   │       └── microphone_client.py
│   │   │   ├── text/
│   │   │   │   └── http_client.py
│   │   │   └── tts/
│   │   │       ├── offline/
│   │   │       │   └── http_client.py
│   │   │       └── online/
│   │   │           ├── http_client.py
│   │   │           └── ws_client.py
│   │   ├── util.py
│   │   ├── utils/
│   │   │   ├── __init__.py
│   │   │   ├── audio_handler.py
│   │   │   ├── audio_process.py
│   │   │   ├── buffer.py
│   │   │   ├── config.py
│   │   │   ├── errors.py
│   │   │   ├── exception.py
│   │   │   ├── onnx_infer.py
│   │   │   ├── paddle_predictor.py
│   │   │   ├── util.py
│   │   │   └── vad.py
│   │   └── ws/
│   │       ├── __init__.py
│   │       ├── api.py
│   │       ├── asr_api.py
│   │       └── tts_api.py
│   ├── t2s/
│   │   ├── __init__.py
│   │   ├── assets/
│   │   │   ├── __init__.py
│   │   │   ├── csmsc_test.txt
│   │   │   ├── sentences.txt
│   │   │   ├── sentences_canton.txt
│   │   │   ├── sentences_en.txt
│   │   │   ├── sentences_mix.txt
│   │   │   ├── sentences_sing.txt
│   │   │   └── sentences_ssml.txt
│   │   ├── audio/
│   │   │   ├── __init__.py
│   │   │   ├── audio.py
│   │   │   ├── codec.py
│   │   │   └── spec_normalizer.py
│   │   ├── datasets/
│   │   │   ├── __init__.py
│   │   │   ├── am_batch_fn.py
│   │   │   ├── batch.py
│   │   │   ├── data_table.py
│   │   │   ├── dataset.py
│   │   │   ├── get_feats.py
│   │   │   ├── ljspeech.py
│   │   │   ├── preprocess_utils.py
│   │   │   ├── sampler.py
│   │   │   └── vocoder_batch_fn.py
│   │   ├── exps/
│   │   │   ├── PTQ_dynamic.py
│   │   │   ├── PTQ_static.py
│   │   │   ├── __init__.py
│   │   │   ├── diffsinger/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gen_gta_mel.py
│   │   │   │   ├── get_minmax.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   └── train.py
│   │   │   ├── dygraph_to_static.py
│   │   │   ├── ernie_sat/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── align.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── synthesize.py
│   │   │   │   ├── synthesize_e2e.py
│   │   │   │   ├── train.py
│   │   │   │   └── utils.py
│   │   │   ├── fastspeech2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gen_gta_mel.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── train.py
│   │   │   │   └── vc2_infer.py
│   │   │   ├── gan_vocoder/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hifigan/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── train.py
│   │   │   │   ├── multi_band_melgan/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── train.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── parallelwave_gan/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── synthesize_from_wav.py
│   │   │   │   │   └── train.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── style_melgan/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── train.py
│   │   │   │   └── synthesize.py
│   │   │   ├── inference.py
│   │   │   ├── inference_streaming.py
│   │   │   ├── jets/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── inference.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── synthesize.py
│   │   │   │   ├── synthesize_e2e.py
│   │   │   │   └── train.py
│   │   │   ├── lite_predict.py
│   │   │   ├── lite_predict_streaming.py
│   │   │   ├── lite_syn_utils.py
│   │   │   ├── ort_predict.py
│   │   │   ├── ort_predict_e2e.py
│   │   │   ├── ort_predict_streaming.py
│   │   │   ├── speedyspeech/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gen_gta_mel.py
│   │   │   │   ├── inference.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── synthesize_e2e.py
│   │   │   │   └── train.py
│   │   │   ├── starganv2_vc/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── train.py
│   │   │   │   └── vc.py
│   │   │   ├── stream_play_tts.py
│   │   │   ├── syn_utils.py
│   │   │   ├── synthesize.py
│   │   │   ├── synthesize_e2e.py
│   │   │   ├── synthesize_streaming.py
│   │   │   ├── tacotron2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── preprocess.py
│   │   │   │   └── train.py
│   │   │   ├── transformer_tts/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── synthesize.py
│   │   │   │   ├── synthesize_e2e.py
│   │   │   │   └── train.py
│   │   │   ├── vits/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── inference.py
│   │   │   │   ├── lite_predict.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── synthesize.py
│   │   │   │   ├── synthesize_e2e.py
│   │   │   │   ├── train.py
│   │   │   │   └── voice_cloning.py
│   │   │   ├── voice_cloning.py
│   │   │   ├── waveflow/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── config.py
│   │   │   │   ├── ljspeech.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── synthesize.py
│   │   │   │   └── train.py
│   │   │   └── wavernn/
│   │   │       ├── __init__.py
│   │   │       ├── synthesize.py
│   │   │       └── train.py
│   │   ├── frontend/
│   │   │   ├── __init__.py
│   │   │   ├── arpabet.py
│   │   │   ├── canton_frontend.py
│   │   │   ├── en_frontend.py
│   │   │   ├── g2pw/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dataset.py
│   │   │   │   ├── onnx_api.py
│   │   │   │   └── utils.py
│   │   │   ├── generate_lexicon.py
│   │   │   ├── mix_frontend.py
│   │   │   ├── normalizer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── abbrrviation.py
│   │   │   │   ├── acronyms.py
│   │   │   │   ├── normalizer.py
│   │   │   │   ├── numbers.py
│   │   │   │   └── width.py
│   │   │   ├── phonectic.py
│   │   │   ├── polyphonic.py
│   │   │   ├── polyphonic.yaml
│   │   │   ├── punctuation.py
│   │   │   ├── rhy_prediction/
│   │   │   │   ├── __init__.py
│   │   │   │   └── rhy_predictor.py
│   │   │   ├── sing_frontend.py
│   │   │   ├── ssml/
│   │   │   │   ├── __init__.py
│   │   │   │   └── xml_processor.py
│   │   │   ├── tone_sandhi.py
│   │   │   ├── vocab.py
│   │   │   ├── zh_frontend.py
│   │   │   └── zh_normalization/
│   │   │       ├── README.md
│   │   │       ├── __init__.py
│   │   │       ├── char_convert.py
│   │   │       ├── chronology.py
│   │   │       ├── constants.py
│   │   │       ├── num.py
│   │   │       ├── phonecode.py
│   │   │       ├── quantifier.py
│   │   │       └── text_normlization.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── diffsinger/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── diffsinger.py
│   │   │   │   ├── diffsinger_updater.py
│   │   │   │   └── fastspeech2midi.py
│   │   │   ├── ernie_sat/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── ernie_sat.py
│   │   │   │   └── ernie_sat_updater.py
│   │   │   ├── fastspeech2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── fastspeech2.py
│   │   │   │   └── fastspeech2_updater.py
│   │   │   ├── hifigan/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hifigan.py
│   │   │   │   └── hifigan_updater.py
│   │   │   ├── jets/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── alignments.py
│   │   │   │   ├── generator.py
│   │   │   │   ├── jets.py
│   │   │   │   ├── jets_updater.py
│   │   │   │   └── length_regulator.py
│   │   │   ├── melgan/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── melgan.py
│   │   │   │   ├── multi_band_melgan_updater.py
│   │   │   │   ├── style_melgan.py
│   │   │   │   └── style_melgan_updater.py
│   │   │   ├── parallel_wavegan/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── parallel_wavegan.py
│   │   │   │   └── parallel_wavegan_updater.py
│   │   │   ├── speedyspeech/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── speedyspeech.py
│   │   │   │   └── speedyspeech_updater.py
│   │   │   ├── starganv2_vc/
│   │   │   │   ├── AuxiliaryASR/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── config.yml
│   │   │   │   │   ├── layers.py
│   │   │   │   │   └── model.py
│   │   │   │   ├── JDCNet/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── model.py
│   │   │   │   ├── __init__.py
│   │   │   │   ├── losses.py
│   │   │   │   ├── starganv2_vc.py
│   │   │   │   ├── starganv2_vc_updater.py
│   │   │   │   └── transforms.py
│   │   │   ├── tacotron2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── tacotron2.py
│   │   │   │   └── tacotron2_updater.py
│   │   │   ├── transformer_tts/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── transformer_tts.py
│   │   │   │   └── transformer_tts_updater.py
│   │   │   ├── vits/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── duration_predictor.py
│   │   │   │   ├── flow.py
│   │   │   │   ├── generator.py
│   │   │   │   ├── monotonic_align/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── core.pyx
│   │   │   │   │   └── setup.py
│   │   │   │   ├── posterior_encoder.py
│   │   │   │   ├── residual_coupling.py
│   │   │   │   ├── text_encoder.py
│   │   │   │   ├── transform.py
│   │   │   │   ├── vits.py
│   │   │   │   ├── vits_updater.py
│   │   │   │   └── wavenet/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── residual_block.py
│   │   │   │       └── wavenet.py
│   │   │   ├── waveflow.py
│   │   │   └── wavernn/
│   │   │       ├── __init__.py
│   │   │       ├── wavernn.py
│   │   │       └── wavernn_updater.py
│   │   ├── modules/
│   │   │   ├── __init__.py
│   │   │   ├── activation.py
│   │   │   ├── adversarial_loss/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gradient_reversal.py
│   │   │   │   └── speaker_classifier.py
│   │   │   ├── causal_conv.py
│   │   │   ├── conformer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── convolution.py
│   │   │   │   └── encoder_layer.py
│   │   │   ├── conv.py
│   │   │   ├── diffnet.py
│   │   │   ├── diffusion.py
│   │   │   ├── fftconv1d.py
│   │   │   ├── geometry.py
│   │   │   ├── layer_norm.py
│   │   │   ├── losses.py
│   │   │   ├── masked_fill.py
│   │   │   ├── nets_utils.py
│   │   │   ├── normalizer.py
│   │   │   ├── positional_encoding.py
│   │   │   ├── pqmf.py
│   │   │   ├── predictor/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── duration_predictor.py
│   │   │   │   ├── length_regulator.py
│   │   │   │   └── variance_predictor.py
│   │   │   ├── residual_block.py
│   │   │   ├── residual_stack.py
│   │   │   ├── style_encoder.py
│   │   │   ├── tacotron2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── attentions.py
│   │   │   │   ├── decoder.py
│   │   │   │   └── encoder.py
│   │   │   ├── tade_res_block.py
│   │   │   ├── transformer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── attention.py
│   │   │   │   ├── decoder.py
│   │   │   │   ├── decoder_layer.py
│   │   │   │   ├── embedding.py
│   │   │   │   ├── encoder.py
│   │   │   │   ├── encoder_layer.py
│   │   │   │   ├── lightconv.py
│   │   │   │   ├── mask.py
│   │   │   │   ├── multi_layer_conv.py
│   │   │   │   ├── positionwise_feed_forward.py
│   │   │   │   ├── repeat.py
│   │   │   │   └── subsampling.py
│   │   │   ├── upsample.py
│   │   │   └── wavenet_denoiser.py
│   │   ├── training/
│   │   │   ├── __init__.py
│   │   │   ├── cli.py
│   │   │   ├── default_config.py
│   │   │   ├── experiment.py
│   │   │   ├── extension.py
│   │   │   ├── extensions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── evaluator.py
│   │   │   │   ├── snapshot.py
│   │   │   │   └── visualizer.py
│   │   │   ├── optimizer.py
│   │   │   ├── reporter.py
│   │   │   ├── seeding.py
│   │   │   ├── trainer.py
│   │   │   ├── trigger.py
│   │   │   ├── triggers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── interval_trigger.py
│   │   │   │   ├── limit_trigger.py
│   │   │   │   └── time_trigger.py
│   │   │   ├── updater.py
│   │   │   └── updaters/
│   │   │       ├── __init__.py
│   │   │       └── standard_updater.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── checkpoint.py
│   │       ├── display.py
│   │       ├── error_rate.py
│   │       ├── h5_utils.py
│   │       ├── internals.py
│   │       ├── layer_tools.py
│   │       ├── mp_tools.py
│   │       ├── profiler.py
│   │       └── scheduler.py
│   ├── text/
│   │   ├── __init__.py
│   │   ├── exps/
│   │   │   ├── __init__.py
│   │   │   └── ernie_linear/
│   │   │       ├── __init__.py
│   │   │       ├── avg_model.py
│   │   │       ├── punc_restore.py
│   │   │       ├── test.py
│   │   │       └── train.py
│   │   └── models/
│   │       ├── __init__.py
│   │       ├── ernie_crf/
│   │       │   ├── __init__.py
│   │       │   └── model.py
│   │       └── ernie_linear/
│   │           ├── __init__.py
│   │           ├── dataset.py
│   │           ├── ernie_linear.py
│   │           └── ernie_linear_updater.py
│   ├── utils/
│   │   ├── __init__.py
│   │   ├── argparse.py
│   │   ├── dynamic_import.py
│   │   ├── env.py
│   │   └── initialize.py
│   └── vector/
│       ├── __init__.py
│       ├── cluster/
│       │   ├── __init__.py
│       │   ├── diarization.py
│       │   └── plda.py
│       ├── exps/
│       │   ├── __init__.py
│       │   ├── ecapa_tdnn/
│       │   │   ├── extract_emb.py
│       │   │   ├── test.py
│       │   │   └── train.py
│       │   └── ge2e/
│       │       ├── __init__.py
│       │       ├── audio_processor.py
│       │       ├── config.py
│       │       ├── dataset_processors.py
│       │       ├── inference.py
│       │       ├── preprocess.py
│       │       ├── random_cycle.py
│       │       ├── speaker_verification_dataset.py
│       │       └── train.py
│       ├── io/
│       │   ├── __init__.py
│       │   ├── augment.py
│       │   ├── batch.py
│       │   ├── dataset.py
│       │   ├── dataset_from_json.py
│       │   ├── embedding_norm.py
│       │   └── signal_processing.py
│       ├── models/
│       │   ├── __init__.py
│       │   ├── ecapa_tdnn.py
│       │   └── lstm_speaker_encoder.py
│       ├── modules/
│       │   ├── __init__.py
│       │   ├── layer.py
│       │   ├── loss.py
│       │   └── sid_model.py
│       ├── training/
│       │   ├── __init__.py
│       │   ├── scheduler.py
│       │   └── seeding.py
│       └── utils/
│           ├── __init__.py
│           ├── time.py
│           └── vector_utils.py
├── runtime/
│   ├── .clang-format
│   ├── .gitignore
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── build.sh
│   ├── build_android.sh
│   ├── build_ios.sh
│   ├── cmake/
│   │   ├── EnableCMP0048.cmake
│   │   ├── EnableCMP0077.cmake
│   │   ├── FindGFortranLibs.cmake
│   │   ├── absl.cmake
│   │   ├── boost.cmake
│   │   ├── eigen.cmake
│   │   ├── fastdeploy.cmake
│   │   ├── gflags.cmake
│   │   ├── glog.cmake
│   │   ├── gtest.cmake
│   │   ├── kenlm.cmake
│   │   ├── libsndfile.cmake
│   │   ├── openblas.cmake
│   │   ├── openfst.cmake
│   │   ├── paddleinference.cmake
│   │   ├── pybind.cmake
│   │   ├── summary.cmake
│   │   └── system.cmake
│   ├── docker/
│   │   └── .gitkeep
│   ├── engine/
│   │   ├── CMakeLists.txt
│   │   ├── asr/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── decoder/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── common.h
│   │   │   │   ├── ctc_beam_search_opt.h
│   │   │   │   ├── ctc_prefix_beam_search_decoder.cc
│   │   │   │   ├── ctc_prefix_beam_search_decoder.h
│   │   │   │   ├── ctc_prefix_beam_search_decoder_main.cc
│   │   │   │   ├── ctc_prefix_beam_search_score.h
│   │   │   │   ├── ctc_tlg_decoder.cc
│   │   │   │   ├── ctc_tlg_decoder.h
│   │   │   │   ├── ctc_tlg_decoder_main.cc
│   │   │   │   ├── decoder_itf.h
│   │   │   │   └── param.h
│   │   │   ├── nnet/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── decodable.cc
│   │   │   │   ├── decodable.h
│   │   │   │   ├── nnet_itf.h
│   │   │   │   ├── nnet_producer.cc
│   │   │   │   ├── nnet_producer.h
│   │   │   │   ├── u2_nnet.cc
│   │   │   │   ├── u2_nnet.h
│   │   │   │   ├── u2_nnet_main.cc
│   │   │   │   ├── u2_nnet_thread_main.cc
│   │   │   │   ├── u2_onnx_nnet.cc
│   │   │   │   └── u2_onnx_nnet.h
│   │   │   ├── recognizer/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── recognizer.cc
│   │   │   │   ├── recognizer.h
│   │   │   │   ├── recognizer_batch_main.cc
│   │   │   │   ├── recognizer_batch_main2.cc
│   │   │   │   ├── recognizer_controller.cc
│   │   │   │   ├── recognizer_controller.h
│   │   │   │   ├── recognizer_controller_impl.cc
│   │   │   │   ├── recognizer_controller_impl.h
│   │   │   │   ├── recognizer_instance.cc
│   │   │   │   ├── recognizer_instance.h
│   │   │   │   ├── recognizer_main.cc
│   │   │   │   └── recognizer_resource.h
│   │   │   └── server/
│   │   │       ├── CMakeLists.txt
│   │   │       └── websocket/
│   │   │           ├── CMakeLists.txt
│   │   │           ├── websocket_client.cc
│   │   │           ├── websocket_client.h
│   │   │           ├── websocket_client_main.cc
│   │   │           ├── websocket_server.cc
│   │   │           ├── websocket_server.h
│   │   │           └── websocket_server_main.cc
│   │   ├── audio_classification/
│   │   │   ├── CMakeLists.txt
│   │   │   └── nnet/
│   │   │       ├── CMakeLists.txt
│   │   │       ├── panns_interface.cc
│   │   │       ├── panns_interface.h
│   │   │       ├── panns_nnet.cc
│   │   │       ├── panns_nnet.h
│   │   │       └── panns_nnet_main.cc
│   │   ├── codelab/
│   │   │   ├── CMakeLists.txt
│   │   │   └── README.md
│   │   ├── common/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── base/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── basic_types.h
│   │   │   │   ├── common.h
│   │   │   │   ├── config.h
│   │   │   │   ├── flags.h.in
│   │   │   │   ├── glog_utils.cc
│   │   │   │   ├── glog_utils.h
│   │   │   │   ├── log.h.in
│   │   │   │   ├── log_impl.cc
│   │   │   │   ├── log_impl.h
│   │   │   │   ├── macros.h
│   │   │   │   ├── safe_queue.h
│   │   │   │   ├── safe_queue_inl.h
│   │   │   │   └── thread_pool.h
│   │   │   ├── frontend/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── assembler.cc
│   │   │   │   ├── assembler.h
│   │   │   │   ├── audio_cache.cc
│   │   │   │   ├── audio_cache.h
│   │   │   │   ├── cmvn.cc
│   │   │   │   ├── cmvn.h
│   │   │   │   ├── compute_fbank_main.cc
│   │   │   │   ├── compute_linear_spectrogram_main.cc
│   │   │   │   ├── data_cache.h
│   │   │   │   ├── db_norm.cc
│   │   │   │   ├── db_norm.h
│   │   │   │   ├── fbank.h
│   │   │   │   ├── feature-fbank.cc
│   │   │   │   ├── feature-fbank.h
│   │   │   │   ├── feature-functions.cc
│   │   │   │   ├── feature-functions.h
│   │   │   │   ├── feature-window.cc
│   │   │   │   ├── feature-window.h
│   │   │   │   ├── feature_cache.cc
│   │   │   │   ├── feature_cache.h
│   │   │   │   ├── feature_common.h
│   │   │   │   ├── feature_common_inl.h
│   │   │   │   ├── feature_pipeline.cc
│   │   │   │   ├── feature_pipeline.h
│   │   │   │   ├── fftsg.c
│   │   │   │   ├── frontend_itf.h
│   │   │   │   ├── linear_spectrogram.cc
│   │   │   │   ├── linear_spectrogram.h
│   │   │   │   ├── mel-computations.cc
│   │   │   │   ├── mel-computations.h
│   │   │   │   ├── normalizer.h
│   │   │   │   ├── rfft.cc
│   │   │   │   ├── rfft.h
│   │   │   │   ├── wave-reader.cc
│   │   │   │   └── wave-reader.h
│   │   │   ├── matrix/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── kaldi-matrix-inl.h
│   │   │   │   ├── kaldi-matrix.cc
│   │   │   │   ├── kaldi-matrix.h
│   │   │   │   ├── kaldi-vector-inl.h
│   │   │   │   ├── kaldi-vector.cc
│   │   │   │   ├── kaldi-vector.h
│   │   │   │   └── matrix-common.h
│   │   │   └── utils/
│   │   │       ├── CMakeLists.txt
│   │   │       ├── audio_process.cc
│   │   │       ├── audio_process.h
│   │   │       ├── blank_process_test.cc
│   │   │       ├── file_utils.cc
│   │   │       ├── file_utils.h
│   │   │       ├── math.cc
│   │   │       ├── math.h
│   │   │       ├── picojson.h
│   │   │       ├── strings.cc
│   │   │       ├── strings.h
│   │   │       ├── strings_test.cc
│   │   │       ├── timer.cc
│   │   │       └── timer.h
│   │   ├── kaldi/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── base/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── io-funcs-inl.h
│   │   │   │   ├── io-funcs.cc
│   │   │   │   ├── io-funcs.h
│   │   │   │   ├── kaldi-common.h
│   │   │   │   ├── kaldi-error.cc
│   │   │   │   ├── kaldi-error.h
│   │   │   │   ├── kaldi-math.cc
│   │   │   │   ├── kaldi-math.h
│   │   │   │   ├── kaldi-types.h
│   │   │   │   ├── kaldi-utils.cc
│   │   │   │   ├── kaldi-utils.h
│   │   │   │   ├── timer.cc
│   │   │   │   ├── timer.h
│   │   │   │   └── version.h
│   │   │   ├── decoder/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── decodable-itf.h
│   │   │   │   ├── lattice-faster-decoder.cc
│   │   │   │   ├── lattice-faster-decoder.h
│   │   │   │   ├── lattice-faster-online-decoder.cc
│   │   │   │   └── lattice-faster-online-decoder.h
│   │   │   ├── fstbin/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── fstaddselfloops.cc
│   │   │   │   ├── fstdeterminizestar.cc
│   │   │   │   ├── fstisstochastic.cc
│   │   │   │   ├── fstminimizeencoded.cc
│   │   │   │   └── fsttablecompose.cc
│   │   │   ├── fstext/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── determinize-lattice-inl.h
│   │   │   │   ├── determinize-lattice.h
│   │   │   │   ├── determinize-star-inl.h
│   │   │   │   ├── determinize-star.h
│   │   │   │   ├── fstext-lib.h
│   │   │   │   ├── fstext-utils-inl.h
│   │   │   │   ├── fstext-utils.h
│   │   │   │   ├── kaldi-fst-io-inl.h
│   │   │   │   ├── kaldi-fst-io.cc
│   │   │   │   ├── kaldi-fst-io.h
│   │   │   │   ├── lattice-utils-inl.h
│   │   │   │   ├── lattice-utils.h
│   │   │   │   ├── lattice-weight.h
│   │   │   │   ├── pre-determinize-inl.h
│   │   │   │   ├── pre-determinize.h
│   │   │   │   ├── remove-eps-local-inl.h
│   │   │   │   ├── remove-eps-local.h
│   │   │   │   └── table-matcher.h
│   │   │   ├── lat/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── determinize-lattice-pruned.cc
│   │   │   │   ├── determinize-lattice-pruned.h
│   │   │   │   ├── kaldi-lattice.cc
│   │   │   │   ├── kaldi-lattice.h
│   │   │   │   ├── lattice-functions.cc
│   │   │   │   └── lattice-functions.h
│   │   │   ├── lm/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── arpa-file-parser.cc
│   │   │   │   ├── arpa-file-parser.h
│   │   │   │   ├── arpa-lm-compiler.cc
│   │   │   │   └── arpa-lm-compiler.h
│   │   │   ├── lmbin/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   └── arpa2fst.cc
│   │   │   └── util/
│   │   │       ├── CMakeLists.txt
│   │   │       ├── basic-filebuf.h
│   │   │       ├── common-utils.h
│   │   │       ├── const-integer-set-inl.h
│   │   │       ├── const-integer-set.h
│   │   │       ├── edit-distance-inl.h
│   │   │       ├── edit-distance.h
│   │   │       ├── hash-list-inl.h
│   │   │       ├── hash-list.h
│   │   │       ├── kaldi-cygwin-io-inl.h
│   │   │       ├── kaldi-holder-inl.h
│   │   │       ├── kaldi-holder.cc
│   │   │       ├── kaldi-holder.h
│   │   │       ├── kaldi-io-inl.h
│   │   │       ├── kaldi-io.cc
│   │   │       ├── kaldi-io.h
│   │   │       ├── kaldi-pipebuf.h
│   │   │       ├── kaldi-semaphore.cc
│   │   │       ├── kaldi-semaphore.h
│   │   │       ├── kaldi-table-inl.h
│   │   │       ├── kaldi-table.cc
│   │   │       ├── kaldi-table.h
│   │   │       ├── kaldi-thread.cc
│   │   │       ├── kaldi-thread.h
│   │   │       ├── options-itf.h
│   │   │       ├── parse-options.cc
│   │   │       ├── parse-options.h
│   │   │       ├── simple-io-funcs.cc
│   │   │       ├── simple-io-funcs.h
│   │   │       ├── simple-options.cc
│   │   │       ├── simple-options.h
│   │   │       ├── stl-utils.h
│   │   │       ├── table-types.h
│   │   │       ├── text-utils.cc
│   │   │       └── text-utils.h
│   │   └── vad/
│   │       ├── CMakeLists.txt
│   │       ├── frontend/
│   │       │   └── wav.h
│   │       ├── interface/
│   │       │   ├── CMakeLists.txt
│   │       │   ├── vad_interface.cc
│   │       │   ├── vad_interface.h
│   │       │   └── vad_interface_main.cc
│   │       └── nnet/
│   │           ├── CMakeLists.txt
│   │           ├── vad.cc
│   │           ├── vad.h
│   │           └── vad_nnet_main.cc
│   ├── examples/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── android/
│   │   │   └── VadJni/
│   │   │       ├── .gitignore
│   │   │       ├── app/
│   │   │       │   ├── .gitignore
│   │   │       │   ├── build.gradle
│   │   │       │   ├── libs/
│   │   │       │   │   └── .gitkeep
│   │   │       │   ├── proguard-rules.pro
│   │   │       │   └── src/
│   │   │       │       ├── androidTest/
│   │   │       │       │   └── java/
│   │   │       │       │       └── com/
│   │   │       │       │           └── baidu/
│   │   │       │       │               └── paddlespeech/
│   │   │       │       │                   └── vadjni/
│   │   │       │       │                       └── ExampleInstrumentedTest.java
│   │   │       │       └── main/
│   │   │       │           ├── AndroidManifest.xml
│   │   │       │           ├── assets/
│   │   │       │           │   └── .gitkeep
│   │   │       │           ├── cpp/
│   │   │       │           │   ├── CMakeLists.txt
│   │   │       │           │   ├── native-lib.cpp
│   │   │       │           │   └── vad_interface.h
│   │   │       │           ├── java/
│   │   │       │           │   └── com/
│   │   │       │           │       └── baidu/
│   │   │       │           │           └── paddlespeech/
│   │   │       │           │               └── vadjni/
│   │   │       │           │                   └── MainActivity.java
│   │   │       │           └── res/
│   │   │       │               ├── drawable/
│   │   │       │               │   └── ic_launcher_background.xml
│   │   │       │               ├── drawable-v24/
│   │   │       │               │   └── ic_launcher_foreground.xml
│   │   │       │               ├── layout/
│   │   │       │               │   └── activity_main.xml
│   │   │       │               ├── mipmap-anydpi-v26/
│   │   │       │               │   ├── ic_launcher.xml
│   │   │       │               │   └── ic_launcher_round.xml
│   │   │       │               ├── mipmap-anydpi-v33/
│   │   │       │               │   └── ic_launcher.xml
│   │   │       │               ├── values/
│   │   │       │               │   ├── colors.xml
│   │   │       │               │   ├── strings.xml
│   │   │       │               │   └── themes.xml
│   │   │       │               ├── values-night/
│   │   │       │               │   └── themes.xml
│   │   │       │               └── xml/
│   │   │       │                   ├── backup_rules.xml
│   │   │       │                   └── data_extraction_rules.xml
│   │   │       ├── build.gradle
│   │   │       ├── gradle/
│   │   │       │   └── wrapper/
│   │   │       │       ├── gradle-wrapper.jar
│   │   │       │       └── gradle-wrapper.properties
│   │   │       ├── gradle.properties
│   │   │       ├── gradlew
│   │   │       ├── gradlew.bat
│   │   │       └── settings.gradle
│   │   ├── audio_classification/
│   │   │   ├── README.md
│   │   │   ├── android_demo/
│   │   │   │   ├── .gitignore
│   │   │   │   ├── app/
│   │   │   │   │   ├── .gitignore
│   │   │   │   │   ├── build.gradle
│   │   │   │   │   ├── proguard-rules.pro
│   │   │   │   │   └── src/
│   │   │   │   │       ├── androidTest/
│   │   │   │   │       │   └── java/
│   │   │   │   │       │       └── com/
│   │   │   │   │       │           └── example/
│   │   │   │   │       │               └── cls/
│   │   │   │   │       │                   └── ExampleInstrumentedTest.kt
│   │   │   │   │       └── main/
│   │   │   │   │           ├── AndroidManifest.xml
│   │   │   │   │           ├── cpp/
│   │   │   │   │           │   ├── CMakeLists.txt
│   │   │   │   │           │   ├── includes/
│   │   │   │   │           │   │   └── panns_interface.h
│   │   │   │   │           │   └── native-lib.cpp
│   │   │   │   │           ├── java/
│   │   │   │   │           │   └── com/
│   │   │   │   │           │       └── example/
│   │   │   │   │           │           └── cls/
│   │   │   │   │           │               └── MainActivity.kt
│   │   │   │   │           └── res/
│   │   │   │   │               ├── drawable/
│   │   │   │   │               │   └── ic_launcher_background.xml
│   │   │   │   │               ├── drawable-v24/
│   │   │   │   │               │   └── ic_launcher_foreground.xml
│   │   │   │   │               ├── layout/
│   │   │   │   │               │   └── activity_main.xml
│   │   │   │   │               ├── mipmap-anydpi-v26/
│   │   │   │   │               │   ├── ic_launcher.xml
│   │   │   │   │               │   └── ic_launcher_round.xml
│   │   │   │   │               ├── values/
│   │   │   │   │               │   ├── colors.xml
│   │   │   │   │               │   ├── strings.xml
│   │   │   │   │               │   └── themes.xml
│   │   │   │   │               ├── values-night/
│   │   │   │   │               │   └── themes.xml
│   │   │   │   │               └── xml/
│   │   │   │   │                   ├── backup_rules.xml
│   │   │   │   │                   └── data_extraction_rules.xml
│   │   │   │   ├── build.gradle
│   │   │   │   ├── gradle/
│   │   │   │   │   └── wrapper/
│   │   │   │   │       ├── gradle-wrapper.jar
│   │   │   │   │       └── gradle-wrapper.properties
│   │   │   │   ├── gradle.properties
│   │   │   │   ├── gradlew
│   │   │   │   ├── gradlew.bat
│   │   │   │   └── settings.gradle
│   │   │   ├── conf
│   │   │   ├── label_list
│   │   │   └── scp
│   │   ├── codelab/
│   │   │   ├── README.md
│   │   │   ├── decoder/
│   │   │   │   ├── .gitignore
│   │   │   │   ├── README.md
│   │   │   │   ├── path.sh
│   │   │   │   ├── run.sh
│   │   │   │   └── valgrind.sh
│   │   │   ├── feat/
│   │   │   │   ├── .gitignore
│   │   │   │   ├── README.md
│   │   │   │   ├── path.sh
│   │   │   │   ├── run.sh
│   │   │   │   └── valgrind.sh
│   │   │   ├── nnet/
│   │   │   │   ├── .gitignore
│   │   │   │   ├── README.md
│   │   │   │   ├── path.sh
│   │   │   │   ├── run.sh
│   │   │   │   └── valgrind.sh
│   │   │   └── u2/
│   │   │       ├── .gitignore
│   │   │       ├── README.md
│   │   │       ├── local/
│   │   │       │   ├── decode.sh
│   │   │       │   ├── feat.sh
│   │   │       │   ├── nnet.sh
│   │   │       │   └── recognizer.sh
│   │   │       ├── path.sh
│   │   │       └── run.sh
│   │   ├── custom_asr/
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   ├── compile_lexicon_token_fst.sh
│   │   │   │   ├── mk_slot_graph.sh
│   │   │   │   ├── mk_tlg_with_slot.sh
│   │   │   │   └── train_lm_with_slot.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── text_lm/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   └── mmseg.py
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── u2pp_ol/
│   │   │   ├── README.md
│   │   │   └── wenetspeech/
│   │   │       ├── .gitignore
│   │   │       ├── README.md
│   │   │       ├── RESULTS.md
│   │   │       ├── local/
│   │   │       │   ├── aishell_train_lms.sh
│   │   │       │   ├── decode.sh
│   │   │       │   ├── feat.sh
│   │   │       │   ├── nnet.sh
│   │   │       │   ├── recognizer.sh
│   │   │       │   ├── recognizer_fastdeploy.sh
│   │   │       │   ├── recognizer_quant.sh
│   │   │       │   ├── recognizer_wfst.sh
│   │   │       │   ├── recognizer_wfst_fastdeploy.sh
│   │   │       │   ├── run_build_tlg.sh
│   │   │       │   └── split_data.sh
│   │   │       ├── path.sh
│   │   │       └── run.sh
│   │   └── vad/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── vad.ini
│   │       ├── local/
│   │       │   ├── build.sh
│   │       │   ├── build_android.sh
│   │       │   ├── decode.sh
│   │       │   └── download.sh
│   │       ├── path.sh
│   │       ├── run.sh
│   │       └── vad-android-demo/
│   │           ├── .gradle/
│   │           │   ├── 6.1.1/
│   │           │   │   └── gc.properties
│   │           │   ├── buildOutputCleanup/
│   │           │   │   └── cache.properties
│   │           │   └── vcs-1/
│   │           │       └── gc.properties
│   │           ├── LICENSE.md
│   │           ├── README
│   │           ├── README.md
│   │           ├── build.gradle
│   │           ├── example/
│   │           │   ├── .gitignore
│   │           │   ├── build.gradle
│   │           │   ├── local.properties
│   │           │   ├── proguard-rules.pro
│   │           │   └── src/
│   │           │       ├── androidTest/
│   │           │       │   └── java/
│   │           │       │       └── com/
│   │           │       │           └── konovalov/
│   │           │       │               └── vad/
│   │           │       │                   └── example/
│   │           │       │                       └── ExampleInstrumentedTest.java
│   │           │       └── main/
│   │           │           ├── AndroidManifest.xml
│   │           │           ├── java/
│   │           │           │   └── com/
│   │           │           │       └── konovalov/
│   │           │           │           └── vad/
│   │           │           │               └── example/
│   │           │           │                   ├── MainActivity.java
│   │           │           │                   └── recorder/
│   │           │           │                       ├── VoiceRecorder.java
│   │           │           │                       └── VoiceRecorderConfig.java
│   │           │           └── res/
│   │           │               ├── drawable/
│   │           │               │   └── ic_launcher_background.xml
│   │           │               ├── drawable-v24/
│   │           │               │   └── ic_launcher_foreground.xml
│   │           │               ├── layout/
│   │           │               │   └── activity_main.xml
│   │           │               ├── mipmap-anydpi-v26/
│   │           │               │   ├── ic_launcher.xml
│   │           │               │   └── ic_launcher_round.xml
│   │           │               └── values/
│   │           │                   ├── colors.xml
│   │           │                   ├── strings.xml
│   │           │                   └── styles.xml
│   │           ├── gradle/
│   │           │   └── wrapper/
│   │           │       ├── gradle-wrapper.jar
│   │           │       └── gradle-wrapper.properties
│   │           ├── gradle.properties
│   │           ├── gradlew
│   │           ├── gradlew.bat
│   │           ├── local.properties
│   │           ├── settings.gradle
│   │           └── vad/
│   │               ├── .gitignore
│   │               ├── build.gradle
│   │               ├── consumer-rules.pro
│   │               ├── proguard-rules.pro
│   │               └── src/
│   │                   ├── androidTest/
│   │                   │   └── java/
│   │                   │       └── com/
│   │                   │           └── konovalov/
│   │                   │               └── vad/
│   │                   │                   └── ExampleInstrumentedTest.java
│   │                   └── main/
│   │                       ├── AndroidManifest.xml
│   │                       ├── cpp/
│   │                       │   ├── CMakeLists.txt
│   │                       │   ├── includes/
│   │                       │   │   └── vad_interface.h
│   │                       │   └── native-lib.cpp
│   │                       ├── java/
│   │                       │   └── com/
│   │                       │       └── konovalov/
│   │                       │           └── vad/
│   │                       │               ├── Vad.java
│   │                       │               └── VadListener.java
│   │                       └── res/
│   │                           └── values/
│   │                               └── strings.xml
│   ├── patch/
│   │   ├── CPPLINT.cfg
│   │   ├── README.md
│   │   └── openfst/
│   │       └── src/
│   │           ├── include/
│   │           │   └── fst/
│   │           │       ├── flags.h
│   │           │       └── log.h
│   │           └── lib/
│   │               └── flags.cc
│   └── tools/
│       ├── clang-format.sh
│       ├── setup_valgrind.sh
│       └── venv.sh
├── setup.cfg
├── setup.py
├── tests/
│   ├── benchmark/
│   │   ├── conformer/
│   │   │   ├── README.md
│   │   │   ├── prepare.sh
│   │   │   ├── run.sh
│   │   │   └── run_benchmark.sh
│   │   └── pwgan/
│   │       ├── README.md
│   │       ├── run_all.sh
│   │       └── run_benchmark.sh
│   ├── chains/
│   │   ├── ds2/
│   │   │   ├── README.md
│   │   │   ├── ds2_params_lite_train_infer.txt
│   │   │   ├── ds2_params_whole_train_infer.txt
│   │   │   ├── lite_train_infer.sh
│   │   │   ├── prepare.sh
│   │   │   ├── speedyspeech_params_lite.txt
│   │   │   ├── test.sh
│   │   │   └── whole_train_infer.sh
│   │   └── speedyspeech/
│   │       ├── README.md
│   │       ├── infer.sh
│   │       ├── lite_train_infer.sh
│   │       ├── prepare.sh
│   │       ├── speedyspeech_params_lite_multi_gpu.txt
│   │       ├── speedyspeech_params_lite_single_gpu.txt
│   │       ├── speedyspeech_params_whole_multi_gpu.txt
│   │       ├── speedyspeech_params_whole_single_gpu.txt
│   │       ├── test.sh
│   │       └── whole_train_infer.sh
│   ├── test_tipc/
│   │   ├── barrier.sh
│   │   ├── benchmark_train.sh
│   │   ├── common_func.sh
│   │   ├── configs/
│   │   │   ├── conformer/
│   │   │   │   └── train_infer_python.txt
│   │   │   ├── mdtc/
│   │   │   │   └── train_infer_python.txt
│   │   │   └── pwgan/
│   │   │       └── train_infer_python.txt
│   │   ├── conformer/
│   │   │   └── scripts/
│   │   │       └── aishell_tiny.py
│   │   ├── docs/
│   │   │   └── benchmark_train.md
│   │   ├── prepare.sh
│   │   └── test_train_inference_python.sh
│   └── unit/
│       ├── asr/
│       │   ├── deepspeech2_model_test.py
│       │   ├── deepspeech2_online_model_test.py
│       │   ├── deepspeech2_online_model_test.sh
│       │   ├── error_rate_test.py
│       │   ├── mask_test.py
│       │   ├── reverse_pad_list.py
│       │   └── u2_model_test.py
│       ├── audiotools/
│       │   ├── core/
│       │   │   ├── test_audio_signal.py
│       │   │   ├── test_bands.py
│       │   │   ├── test_display.py
│       │   │   ├── test_dsp.py
│       │   │   ├── test_effects.py
│       │   │   ├── test_fftconv.py
│       │   │   ├── test_grad.py
│       │   │   ├── test_highpass.py
│       │   │   ├── test_loudness.py
│       │   │   ├── test_lowpass.py
│       │   │   └── test_util.py
│       │   ├── data/
│       │   │   ├── test_datasets.py
│       │   │   ├── test_preprocess.py
│       │   │   └── test_transforms.py
│       │   ├── ml/
│       │   │   ├── test_decorators.py
│       │   │   └── test_model.py
│       │   ├── test_audiotools.sh
│       │   └── test_post.py
│       ├── ci.sh
│       ├── cli/
│       │   ├── aishell_test_prepare.py
│       │   ├── calc_RTF_CER_by_aishell.sh
│       │   ├── path.sh
│       │   └── test_cli.sh
│       ├── doc/
│       │   └── test_cli.md
│       ├── server/
│       │   ├── offline/
│       │   │   ├── change_yaml.py
│       │   │   ├── conf/
│       │   │   │   └── application.yaml
│       │   │   └── test_server_client.sh
│       │   └── online/
│       │       └── tts/
│       │           ├── check_server/
│       │           │   ├── change_yaml.py
│       │           │   ├── conf/
│       │           │   │   └── application.yaml
│       │           │   ├── test.sh
│       │           │   ├── test_all.sh
│       │           │   └── tts_online_application.yaml
│       │           └── test_server/
│       │               └── test_http_client.py
│       ├── tts/
│       │   ├── test_data_table.py
│       │   ├── test_enfrontend.py
│       │   ├── test_expansion.py
│       │   ├── test_fftconv1d.py
│       │   ├── test_losses.py
│       │   ├── test_mixfrontend.py
│       │   ├── test_optimizer.py
│       │   ├── test_pwg.py
│       │   ├── test_raise.py
│       │   ├── test_reporter.py
│       │   ├── test_snapshot.py
│       │   ├── test_ssml.py
│       │   ├── test_stft.py
│       │   └── test_to_static.py
│       └── vector/
│           ├── conftest.py
│           └── test_augment.py
├── third_party/
│   ├── README.md
│   ├── __init__.py
│   ├── ctc_decoders/
│   │   ├── .gitignore
│   │   ├── COPYING.APACHE2.0
│   │   ├── COPYING.LESSER.3
│   │   ├── LICENSE
│   │   ├── __init__.py
│   │   ├── ctc_beam_search_decoder.cpp
│   │   ├── ctc_beam_search_decoder.h
│   │   ├── ctc_greedy_decoder.cpp
│   │   ├── ctc_greedy_decoder.h
│   │   ├── decoder_utils.cpp
│   │   ├── decoder_utils.h
│   │   ├── decoders.i
│   │   ├── path_trie.cpp
│   │   ├── path_trie.h
│   │   ├── scorer.cpp
│   │   ├── scorer.h
│   │   ├── setup.py
│   │   └── setup.sh
│   ├── install.sh
│   ├── install_win_ctc.bat
│   └── python_kaldi_features/
│       ├── .gitignore
│       ├── LICENSE
│       ├── MANIFEST
│       ├── README.rst
│       ├── docs/
│       │   ├── Makefile
│       │   ├── make.bat
│       │   └── source/
│       │       ├── conf.py
│       │       └── index.rst
│       ├── example.py
│       ├── python_speech_features/
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── base_orig.py
│       │   ├── sigproc.py
│       │   └── sigproc_orig.py
│       ├── requirements.txt
│       ├── setup.py
│       └── test/
│           └── test_sigproc.py
├── tools/
│   ├── Dockerfile
│   ├── Makefile
│   ├── extras/
│   │   ├── README.md
│   │   ├── install_autolog.sh
│   │   ├── install_gcc.sh
│   │   ├── install_kaldi.sh
│   │   ├── install_kenlm.sh
│   │   ├── install_liblbfgs.sh
│   │   ├── install_mfa_v1.sh
│   │   ├── install_mfa_v2.sh
│   │   ├── install_miniconda.sh
│   │   ├── install_mkl.sh
│   │   ├── install_ngram.sh
│   │   ├── install_openblas.sh
│   │   ├── install_openfst.sh
│   │   ├── install_pynini.sh
│   │   ├── install_sclite.sh
│   │   ├── install_soundfile.sh
│   │   ├── install_sox.sh
│   │   ├── install_srilm.sh
│   │   ├── install_venv.sh
│   │   └── srilm.patch
│   ├── get_contributors.ipynb
│   ├── pre_commit.sh
│   ├── release_note.py
│   ├── setup_anaconda.sh
│   └── watermark.py
└── utils/
    ├── DER.py
    ├── README.md
    ├── __init__.py
    ├── addjson.py
    ├── apply-cmvn.py
    ├── avg.sh
    ├── avg_model.py
    ├── build_kenlm_model_from_arpa.sh
    ├── build_vocab.py
    ├── caculate_rtf.py
    ├── compute-cmvn-stats.py
    ├── compute-wer.py
    ├── compute_mean_std.py
    ├── compute_statistics.py
    ├── copy-feats.py
    ├── data2json.sh
    ├── dump.sh
    ├── dump_manifest.py
    ├── duration_from_maniefst.sh
    ├── espnet_json_to_manifest.py
    ├── feat-to-shape.py
    ├── feat_to_shape.sh
    ├── filter.py
    ├── filter_scp.pl
    ├── format_data.py
    ├── format_rsl.py
    ├── format_triplet_data.py
    ├── fst/
    │   ├── add_lex_disambig.pl
    │   ├── compile_lexicon_token_fst.sh
    │   ├── ctc_token_fst.py
    │   ├── ctc_token_fst_corrected.py
    │   ├── eps2disambig.pl
    │   ├── make_lexicon_fst.pl
    │   ├── make_tlg.sh
    │   ├── prepare_dict.py
    │   ├── remove_oovs.pl
    │   ├── rnnt_token_fst.py
    │   └── s2eps.pl
    ├── gen_duration_from_textgrid.py
    ├── generate_infer_yaml.py
    ├── json2trn.py
    ├── link_wav.py
    ├── log.sh
    ├── manifest_key_value.py
    ├── md-eval.pl
    ├── merge_scp2json.py
    ├── ngram_train.sh
    ├── pack_model.sh
    ├── parallel/
    │   └── run.pl
    ├── parse_options.sh
    ├── pd_env_collect.sh
    ├── profile.sh
    ├── reduce_data_dir.sh
    ├── remove_longshortdata.py
    ├── remove_longshortdata.sh
    ├── score_sclite.sh
    ├── scp2json.py
    ├── show_results.sh
    ├── spk2utt_to_utt2spk.pl
    ├── split_data.sh
    ├── split_json.sh
    ├── split_scp.pl
    ├── spm_decode
    ├── spm_encode
    ├── spm_train
    ├── tarball.sh
    ├── text2token.py
    ├── text_to_lexicon.py
    ├── tokenizer.perl
    ├── train_arpa_with_kenlm.sh
    ├── update_json.sh
    ├── utility.sh
    ├── utt2spk_to_spk2utt.pl
    └── zh_tn.py